Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the number of allocations needed to find a specific child/sibling #119

Closed
56 changes: 56 additions & 0 deletions src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,34 @@ impl<L: Language> Iterator for SyntaxNodeChildren<L> {
}
}

impl<L: Language> SyntaxNodeChildren<L> {
pub fn by_kind<'a>(
self,
matcher: &'a impl Fn(SyntaxKind) -> bool,
Veykril marked this conversation as resolved.
Show resolved Hide resolved
) -> SyntaxNodeChildrenByKind<'a, L> {
SyntaxNodeChildrenByKind { raw: self.raw.by_kind(matcher), _p: PhantomData }
}
}

#[derive(Clone)]
pub struct SyntaxNodeChildrenByKind<'a, L: Language> {
raw: cursor::SyntaxNodeChildrenByKind<&'a dyn Fn(SyntaxKind) -> bool>,
_p: PhantomData<L>,
}

impl<'a, L: Language> Iterator for SyntaxNodeChildrenByKind<'a, L> {
type Item = SyntaxNode<L>;
fn next(&mut self) -> Option<Self::Item> {
self.raw.next().map(SyntaxNode::from)
}
}

impl<'a, L: Language> fmt::Debug for SyntaxNodeChildrenByKind<'a, L> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SyntaxNodeChildrenByKind").finish()
}
}

#[derive(Debug, Clone)]
pub struct SyntaxElementChildren<L: Language> {
raw: cursor::SyntaxElementChildren,
Expand All @@ -401,6 +429,34 @@ impl<L: Language> Iterator for SyntaxElementChildren<L> {
}
}

impl<L: Language> SyntaxElementChildren<L> {
pub fn by_kind<'a>(
self,
matcher: &'a impl Fn(SyntaxKind) -> bool,
Veykril marked this conversation as resolved.
Show resolved Hide resolved
) -> SyntaxElementChildrenByKind<'a, L> {
SyntaxElementChildrenByKind { raw: self.raw.by_kind(matcher), _p: PhantomData }
}
}

#[derive(Clone)]
pub struct SyntaxElementChildrenByKind<'a, L: Language> {
raw: cursor::SyntaxElementChildrenByKind<&'a dyn Fn(SyntaxKind) -> bool>,
_p: PhantomData<L>,
}

impl<'a, L: Language> Iterator for SyntaxElementChildrenByKind<'a, L> {
type Item = SyntaxElement<L>;
fn next(&mut self) -> Option<Self::Item> {
self.raw.next().map(NodeOrToken::from)
}
}

impl<'a, L: Language> fmt::Debug for SyntaxElementChildrenByKind<'a, L> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SyntaxElementChildrenByKind").finish()
}
}

pub struct Preorder<L: Language> {
raw: cursor::Preorder,
_p: PhantomData<L>,
Expand Down
192 changes: 190 additions & 2 deletions src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,24 @@ impl NodeData {
})
})
}

fn next_sibling_by_kind(&self, matcher: &impl Fn(SyntaxKind) -> bool) -> Option<SyntaxNode> {
let mut siblings = self.green_siblings().enumerate();
let index = self.index() as usize;

siblings.nth(index);
Veykril marked this conversation as resolved.
Show resolved Hide resolved
siblings.find_map(|(index, child)| {
if !matcher(child.as_ref().kind()) {
return None;
}
child.as_ref().into_node().and_then(|green| {
let parent = self.parent_node()?;
let offset = parent.offset() + child.rel_offset();
Some(SyntaxNode::new_child(green, parent, index as u32, offset))
})
})
}

fn prev_sibling(&self) -> Option<SyntaxNode> {
let mut rev_siblings = self.green_siblings().enumerate().rev();
let index = rev_siblings.len() - (self.index() as usize);
Expand All @@ -412,6 +430,25 @@ impl NodeData {
Some(SyntaxElement::new(child.as_ref(), parent, index as u32, offset))
})
}

fn next_sibling_or_token_by_kind(
&self,
matcher: &impl Fn(SyntaxKind) -> bool,
) -> Option<SyntaxElement> {
let mut siblings = self.green_siblings().enumerate();
let index = self.index() as usize;

siblings.nth(index);
siblings.find_map(|(index, child)| {
if !matcher(child.as_ref().kind()) {
return None;
}
let parent = self.parent_node()?;
let offset = parent.offset() + child.rel_offset();
Some(SyntaxElement::new(child.as_ref(), parent, index as u32, offset))
})
}

fn prev_sibling_or_token(&self) -> Option<SyntaxElement> {
let mut siblings = self.green_siblings().enumerate();
let index = self.index().checked_sub(1)? as usize;
Expand Down Expand Up @@ -647,6 +684,23 @@ impl SyntaxNode {
})
})
}

pub fn first_child_by_kind(&self, matcher: &impl Fn(SyntaxKind) -> bool) -> Option<SyntaxNode> {
self.green_ref().children().raw.enumerate().find_map(|(index, child)| {
if !matcher(child.as_ref().kind()) {
return None;
}
child.as_ref().into_node().map(|green| {
SyntaxNode::new_child(
green,
self.clone(),
index as u32,
self.offset() + child.rel_offset(),
)
})
})
}

pub fn last_child(&self) -> Option<SyntaxNode> {
self.green_ref().children().raw.enumerate().rev().find_map(|(index, child)| {
child.as_ref().into_node().map(|green| {
Expand All @@ -665,6 +719,24 @@ impl SyntaxNode {
SyntaxElement::new(child.as_ref(), self.clone(), 0, self.offset() + child.rel_offset())
})
}

pub fn first_child_or_token_by_kind(
&self,
matcher: &impl Fn(SyntaxKind) -> bool,
) -> Option<SyntaxElement> {
self.green_ref().children().raw.enumerate().find_map(|(index, child)| {
if !matcher(child.as_ref().kind()) {
return None;
}
Some(SyntaxElement::new(
child.as_ref(),
self.clone(),
index as u32,
self.offset() + child.rel_offset(),
))
})
}

pub fn last_child_or_token(&self) -> Option<SyntaxElement> {
self.green_ref().children().raw.enumerate().next_back().map(|(index, child)| {
SyntaxElement::new(
Expand All @@ -679,13 +751,29 @@ impl SyntaxNode {
pub fn next_sibling(&self) -> Option<SyntaxNode> {
self.data().next_sibling()
}

pub fn next_sibling_by_kind(
&self,
matcher: &impl Fn(SyntaxKind) -> bool,
) -> Option<SyntaxNode> {
self.data().next_sibling_by_kind(matcher)
}

pub fn prev_sibling(&self) -> Option<SyntaxNode> {
self.data().prev_sibling()
}

pub fn next_sibling_or_token(&self) -> Option<SyntaxElement> {
self.data().next_sibling_or_token()
}

pub fn next_sibling_or_token_by_kind(
&self,
matcher: &impl Fn(SyntaxKind) -> bool,
) -> Option<SyntaxElement> {
self.data().next_sibling_or_token_by_kind(matcher)
}

pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement> {
self.data().prev_sibling_or_token()
}
Expand Down Expand Up @@ -910,6 +998,14 @@ impl SyntaxToken {
pub fn next_sibling_or_token(&self) -> Option<SyntaxElement> {
self.data().next_sibling_or_token()
}

pub fn next_sibling_or_token_by_kind(
&self,
matcher: &impl Fn(SyntaxKind) -> bool,
) -> Option<SyntaxElement> {
self.data().next_sibling_or_token_by_kind(matcher)
}

pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement> {
self.data().prev_sibling_or_token()
}
Expand Down Expand Up @@ -1028,6 +1124,17 @@ impl SyntaxElement {
NodeOrToken::Token(it) => it.next_sibling_or_token(),
}
}

pub fn next_sibling_or_token_by_kind(
&self,
matcher: &impl Fn(SyntaxKind) -> bool,
) -> Option<SyntaxElement> {
match self {
NodeOrToken::Node(it) => it.next_sibling_or_token_by_kind(matcher),
NodeOrToken::Token(it) => it.next_sibling_or_token_by_kind(matcher),
}
}

pub fn prev_sibling_or_token(&self) -> Option<SyntaxElement> {
match self {
NodeOrToken::Node(it) => it.prev_sibling_or_token(),
Expand Down Expand Up @@ -1133,46 +1240,127 @@ impl From<SyntaxToken> for SyntaxElement {

#[derive(Clone, Debug)]
pub struct SyntaxNodeChildren {
parent: SyntaxNode,
next: Option<SyntaxNode>,
next_initialized: bool,
}

impl SyntaxNodeChildren {
fn new(parent: SyntaxNode) -> SyntaxNodeChildren {
SyntaxNodeChildren { next: parent.first_child() }
SyntaxNodeChildren { parent, next: None, next_initialized: false }
}

pub fn by_kind<F: Fn(SyntaxKind) -> bool>(self, matcher: F) -> SyntaxNodeChildrenByKind<F> {
if !self.next_initialized {
SyntaxNodeChildrenByKind { next: self.parent.first_child_by_kind(&matcher), matcher }
} else {
SyntaxNodeChildrenByKind {
next: self.next.and_then(|node| {
if matcher(node.kind()) {
Some(node)
} else {
node.next_sibling_by_kind(&matcher)
}
}),
matcher,
}
}
}
}

impl Iterator for SyntaxNodeChildren {
type Item = SyntaxNode;
fn next(&mut self) -> Option<SyntaxNode> {
if !self.next_initialized {
self.next = self.parent.first_child();
self.next_initialized = true;
}
self.next.take().map(|next| {
self.next = next.next_sibling();
next
})
}
}

#[derive(Clone, Debug)]
pub struct SyntaxNodeChildrenByKind<F: Fn(SyntaxKind) -> bool> {
next: Option<SyntaxNode>,
matcher: F,
}

impl<F: Fn(SyntaxKind) -> bool> Iterator for SyntaxNodeChildrenByKind<F> {
type Item = SyntaxNode;
fn next(&mut self) -> Option<SyntaxNode> {
self.next.take().map(|next| {
self.next = next.next_sibling_by_kind(&self.matcher);
next
})
}
}

#[derive(Clone, Debug)]
pub struct SyntaxElementChildren {
parent: SyntaxNode,
next: Option<SyntaxElement>,
next_initialized: bool,
Comment on lines +1299 to +1301
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, why do we need to change anything here? I'd expect SyntaxElementChildren to remain exactly as they were

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. I changed this so we could avoid creating SyntaxElementChildren::next when calling children().by_kind. This actually makes a pretty significant impact - here's the DHAT output of the integrated completions benchmark without this optimization:

Screen Shot 2021-10-24 at 11 29 39 AM

And here's the DHAT output of the integrated completions benchmark after this optimization:

Screen Shot 2021-10-23 at 11 19 05 PM

98 million blocks allocated (before this optimization) vs 86 million blocks allocated (after this optimization).

Copy link
Member

@Veykril Veykril Nov 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds odd to me that using first_child_or_token_by_kind with these extra changes is so much faster than seeking to the next sibling via next_sibling_or_token_by_kind, as both ways come down to iterating a slice no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, my observation is that using first_child_or_token_by_kind allows us to jump to the first child/token node that matches a kind without needing to allocate any other nodes. Contrast this to using first_child_or_token followed by next_sibling_or_token_by_kind (if the first child doesn't match the kind), which can potentially allocate the first child before jumping to the desired node. I think its this allocation avoidance that explains the improvement in the DHAT outputs.

}

impl SyntaxElementChildren {
fn new(parent: SyntaxNode) -> SyntaxElementChildren {
SyntaxElementChildren { next: parent.first_child_or_token() }
SyntaxElementChildren { parent, next: None, next_initialized: false }
}

pub fn by_kind<F: Fn(SyntaxKind) -> bool>(self, matcher: F) -> SyntaxElementChildrenByKind<F> {
if !self.next_initialized {
SyntaxElementChildrenByKind {
next: self.parent.first_child_or_token_by_kind(&matcher),
matcher,
}
} else {
SyntaxElementChildrenByKind {
next: self.next.and_then(|node| {
if matcher(node.kind()) {
Some(node)
} else {
node.next_sibling_or_token_by_kind(&matcher)
}
}),
matcher,
}
}
}
}

impl Iterator for SyntaxElementChildren {
type Item = SyntaxElement;
fn next(&mut self) -> Option<SyntaxElement> {
if !self.next_initialized {
self.next = self.parent.first_child_or_token();
self.next_initialized = true;
}
self.next.take().map(|next| {
self.next = next.next_sibling_or_token();
next
})
}
}

#[derive(Clone, Debug)]
pub struct SyntaxElementChildrenByKind<F: Fn(SyntaxKind) -> bool> {
next: Option<SyntaxElement>,
matcher: F,
}

impl<F: Fn(SyntaxKind) -> bool> Iterator for SyntaxElementChildrenByKind<F> {
type Item = SyntaxElement;
fn next(&mut self) -> Option<SyntaxElement> {
self.next.take().map(|next| {
self.next = next.next_sibling_or_token_by_kind(&self.matcher);
next
})
}
}

pub struct Preorder {
start: SyntaxNode,
next: Option<WalkEvent<SyntaxNode>>,
Expand Down
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ pub use text_size::{TextLen, TextRange, TextSize};

pub use crate::{
api::{
Language, SyntaxElement, SyntaxElementChildren, SyntaxNode, SyntaxNodeChildren, SyntaxToken,
Language, SyntaxElement, SyntaxElementChildren, SyntaxElementChildrenByKind, SyntaxNode,
SyntaxNodeChildren, SyntaxNodeChildrenByKind, SyntaxToken,
},
green::{
Checkpoint, Children, GreenNode, GreenNodeBuilder, GreenNodeData, GreenToken,
Expand Down