From f8b48225e75640ce54957bc73b8657ffe8155f72 Mon Sep 17 00:00:00 2001 From: niklak Date: Thu, 19 Jan 2023 16:13:20 +0200 Subject: [PATCH 01/12] respectful update after "selectors" crate update --- Cargo.toml | 8 +++--- src/css.rs | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/element.rs | 26 +++++++----------- src/lib.rs | 1 + src/matcher.rs | 55 +++++++++++++++++++------------------- 5 files changed, 114 insertions(+), 48 deletions(-) create mode 100644 src/css.rs diff --git a/Cargo.toml b/Cargo.toml index 3d4bbaf..2addb88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,11 +13,11 @@ readme = "README.md" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -html5ever = "0.25.1" -selectors = "0.22.0" -cssparser = "0.27.2" +html5ever = "0.26.0" +selectors = "0.23.0" +cssparser = "0.28.1" tendril = "0.4.2" -markup5ever = "0.10.0" +markup5ever = "0.11.0" [dev-dependencies] diff --git a/src/css.rs b/src/css.rs new file mode 100644 index 0000000..91565b2 --- /dev/null +++ b/src/css.rs @@ -0,0 +1,72 @@ +use std::convert::AsRef; +use std::fmt; +use std::ops::Deref; + +use cssparser::{self, ToCss}; +use html5ever::LocalName; + +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct StringCSS(String); + +impl Deref for StringCSS { + type Target = String; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl AsRef for StringCSS { + fn as_ref(&self) -> &str { + return self.0.as_str(); + } +} + +impl From<&str> for StringCSS { + fn from(value: &str) -> Self { + let s = String::from(value); + return StringCSS(s); + } +} + +impl ToCss for StringCSS { + fn to_css(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + dest.write_str(self.0.as_str()) + } +} + +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct LocalNameCSS(LocalName); + +impl ToCss for LocalNameCSS { + fn to_css(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + dest.write_str(self.0.trim()) + } +} + +impl From<&str> for LocalNameCSS { + fn from(value: &str) -> Self { + let s = LocalName::from(value); + return LocalNameCSS(s); + } +} + +impl Deref for LocalNameCSS { + type Target = LocalName; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Default for LocalNameCSS { + fn default() -> Self { + Self(Default::default()) + } +} diff --git a/src/element.rs b/src/element.rs index 2390457..0eefb90 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,5 +1,9 @@ +use crate::css::LocalNameCSS; use crate::dom_tree::{Node, NodeData}; use crate::matcher::InnerSelector; + +use std::ops::Deref; + use markup5ever::{namespace_url, ns}; use selectors::attr::AttrSelectorOperation; use selectors::attr::CaseSensitivity; @@ -8,7 +12,6 @@ use selectors::context::MatchingContext; use selectors::matching::ElementSelectorFlags; use selectors::parser::SelectorImpl; use selectors::OpaqueElement; -use std::ops::Deref; impl<'a> selectors::Element for Node<'a> { type Impl = InnerSelector; @@ -60,7 +63,7 @@ impl<'a> selectors::Element for Node<'a> { fn has_local_name(&self, local_name: &::BorrowedLocalName) -> bool { self.query(|node| { if let NodeData::Element(ref e) = node.data { - return &e.name.local == local_name; + return &e.name.local == local_name.deref(); } false @@ -102,7 +105,7 @@ impl<'a> selectors::Element for Node<'a> { if let NodeData::Element(ref e) = node.data { return e.attrs.iter().any(|attr| match *ns { NamespaceConstraint::Specific(url) if *url != attr.name.ns => false, - _ => *local_name == attr.name.local && operation.eval_str(&attr.value), + _ => *local_name.as_ref() == attr.name.local && operation.eval_str(&attr.value), }); } @@ -165,7 +168,7 @@ impl<'a> selectors::Element for Node<'a> { fn has_class( &self, - name: &::ClassName, + name: &::LocalName, case_sensitivity: CaseSensitivity, ) -> bool { self.query(|node| { @@ -183,23 +186,12 @@ impl<'a> selectors::Element for Node<'a> { }) } - // Returns the mapping from the `exportparts` attribute in the regular direction, that is, inner-tree->outer-tree. - fn exported_part( - &self, - _name: &::PartName, - ) -> Option<::PartName> { - None - } - // Returns the mapping from the `exportparts` attribute in the regular direction, that is, outer-tree->inner-tree. - fn imported_part( - &self, - _name: &::PartName, - ) -> Option<::PartName> { + fn imported_part(&self, _name: &LocalNameCSS) -> Option { None } - fn is_part(&self, _name: &::PartName) -> bool { + fn is_part(&self, _name: &LocalNameCSS) -> bool { false } diff --git a/src/lib.rs b/src/lib.rs index 258fd5e..fd95381 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,7 @@ //! // #![deny(missing_docs)] // TODO: add this back in. +mod css; mod document; mod dom_tree; mod element; diff --git a/src/matcher.rs b/src/matcher.rs index d3b3f34..592177d 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -1,6 +1,9 @@ +use crate::css::{LocalNameCSS, StringCSS}; use crate::dom_tree::{NodeData, NodeId, NodeRef}; + use cssparser::ParseError; -use html5ever::{LocalName, Namespace}; +use cssparser::{self, ToCss}; +use html5ever::Namespace; use selectors::matching; use selectors::parser::{self, SelectorList, SelectorParseErrorKind}; use selectors::visitor; @@ -134,14 +137,12 @@ pub struct InnerSelector; impl parser::SelectorImpl for InnerSelector { type ExtraMatchingData = String; - type AttrValue = String; - type Identifier = LocalName; - type ClassName = LocalName; - type PartName = LocalName; - type LocalName = LocalName; + type AttrValue = StringCSS; + type Identifier = LocalNameCSS; + type LocalName = LocalNameCSS; type NamespaceUrl = Namespace; - type NamespacePrefix = LocalName; - type BorrowedLocalName = LocalName; + type NamespacePrefix = LocalNameCSS; + type BorrowedLocalName = LocalNameCSS; type BorrowedNamespaceUrl = Namespace; type NonTSPseudoClass = NonTSPseudoClass; @@ -151,6 +152,15 @@ impl parser::SelectorImpl for InnerSelector { #[derive(Clone, Eq, PartialEq)] pub struct NonTSPseudoClass; +impl ToCss for NonTSPseudoClass { + fn to_css(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + dest.write_str("") + } +} + impl parser::NonTSPseudoClass for NonTSPseudoClass { type Impl = InnerSelector; @@ -162,14 +172,6 @@ impl parser::NonTSPseudoClass for NonTSPseudoClass { false } - fn has_zero_specificity(&self) -> bool { - false - } -} - -impl parser::Visit for NonTSPseudoClass { - type Impl = InnerSelector; - fn visit(&self, _visitor: &mut V) -> bool where V: visitor::SelectorVisitor, @@ -178,7 +180,10 @@ impl parser::Visit for NonTSPseudoClass { } } -impl cssparser::ToCss for NonTSPseudoClass { +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct PseudoElement; + +impl ToCss for PseudoElement { fn to_css(&self, dest: &mut W) -> fmt::Result where W: fmt::Write, @@ -187,18 +192,14 @@ impl cssparser::ToCss for NonTSPseudoClass { } } -#[derive(Clone, Eq, PartialEq)] -pub struct PseudoElement; - impl parser::PseudoElement for PseudoElement { type Impl = InnerSelector; -} -impl cssparser::ToCss for PseudoElement { - fn to_css(&self, dest: &mut W) -> fmt::Result - where - W: fmt::Write, - { - dest.write_str("") + fn accepts_state_pseudo_classes(&self) -> bool { + false + } + + fn valid_after_slotted(&self) -> bool { + false } } From 03798f3ee8145c9212b0300a7ad5ddce2c70b523 Mon Sep 17 00:00:00 2001 From: niklak Date: Fri, 20 Jan 2023 14:27:09 +0200 Subject: [PATCH 02/12] add support for PseudoClasses --- Cargo.toml | 4 +-- src/css.rs | 40 +++++++++++-------------- src/element.rs | 28 +++++++++++++---- src/lib.rs | 5 ++++ src/matcher.rs | 81 +++++++++++++++++++++++++++++++++++++++++++------- 5 files changed, 117 insertions(+), 41 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2addb88..faecd3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "nipper" -version = "0.1.9" +version = "0.1.10" description = "HTML manipulation with CSS seletors" license = "MIT/Apache-2.0" repository = "https://github.com/importcjj/nipper" @@ -18,7 +18,7 @@ selectors = "0.23.0" cssparser = "0.28.1" tendril = "0.4.2" markup5ever = "0.11.0" - +matches = "0.1.4" [dev-dependencies] reqwest = { version = "0.11.3", features = ["blocking"] } diff --git a/src/css.rs b/src/css.rs index 91565b2..5ba9a9e 100644 --- a/src/css.rs +++ b/src/css.rs @@ -2,13 +2,13 @@ use std::convert::AsRef; use std::fmt; use std::ops::Deref; -use cssparser::{self, ToCss}; +use cssparser::{self, ToCss, serialize_string}; use html5ever::LocalName; #[derive(Clone, Eq, PartialEq, Debug)] -pub struct StringCSS(String); +pub struct CssString(String); -impl Deref for StringCSS { +impl Deref for CssString { type Target = String; fn deref(&self) -> &Self::Target { @@ -16,48 +16,47 @@ impl Deref for StringCSS { } } -impl AsRef for StringCSS { +impl AsRef for CssString { fn as_ref(&self) -> &str { - return self.0.as_str(); + return &self.0; } } -impl From<&str> for StringCSS { +impl From<&str> for CssString { fn from(value: &str) -> Self { - let s = String::from(value); - return StringCSS(s); + return CssString(value.to_owned()); } } -impl ToCss for StringCSS { +impl ToCss for CssString { fn to_css(&self, dest: &mut W) -> fmt::Result where W: fmt::Write, { - dest.write_str(self.0.as_str()) + //dest.write_str(&self.0) + cssparser::serialize_string(&self.0, dest) } } -#[derive(Clone, Eq, PartialEq, Debug)] -pub struct LocalNameCSS(LocalName); +#[derive(Clone, Eq, PartialEq, Debug, Default)] +pub struct CssLocalName(LocalName); -impl ToCss for LocalNameCSS { +impl ToCss for CssLocalName { fn to_css(&self, dest: &mut W) -> fmt::Result where W: fmt::Write, { - dest.write_str(self.0.trim()) + dest.write_str(&self.0) } } -impl From<&str> for LocalNameCSS { +impl From<&str> for CssLocalName { fn from(value: &str) -> Self { - let s = LocalName::from(value); - return LocalNameCSS(s); + return CssLocalName(value.into()); } } -impl Deref for LocalNameCSS { +impl Deref for CssLocalName { type Target = LocalName; fn deref(&self) -> &Self::Target { @@ -65,8 +64,3 @@ impl Deref for LocalNameCSS { } } -impl Default for LocalNameCSS { - fn default() -> Self { - Self(Default::default()) - } -} diff --git a/src/element.rs b/src/element.rs index 0eefb90..b00d26b 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,6 +1,6 @@ -use crate::css::LocalNameCSS; +use crate::css::CssLocalName; use crate::dom_tree::{Node, NodeData}; -use crate::matcher::InnerSelector; +use crate::matcher::{InnerSelector, NonTSPseudoClass}; use std::ops::Deref; @@ -115,14 +115,30 @@ impl<'a> selectors::Element for Node<'a> { fn match_non_ts_pseudo_class( &self, - _pc: &::NonTSPseudoClass, + pseudo: &::NonTSPseudoClass, _context: &mut MatchingContext, _flags_setter: &mut F, ) -> bool where F: FnMut(&Self, ElementSelectorFlags), { - false + + use self::NonTSPseudoClass::*; + match *pseudo { + Active | Focus | Hover | Enabled | Disabled | Checked | Indeterminate | Visited => { + false + } + AnyLink | Link => { + match self.node_name() { + Some(node_name) => { + matches!(node_name.deref(),"a" | "area" | "link") + && self.attr("href").is_some() + }, + None => false, + } + + } + } } fn match_pseudo_element( @@ -187,11 +203,11 @@ impl<'a> selectors::Element for Node<'a> { } // Returns the mapping from the `exportparts` attribute in the regular direction, that is, outer-tree->inner-tree. - fn imported_part(&self, _name: &LocalNameCSS) -> Option { + fn imported_part(&self, _name: &CssLocalName) -> Option { None } - fn is_part(&self, _name: &LocalNameCSS) -> bool { + fn is_part(&self, _name: &CssLocalName) -> bool { false } diff --git a/src/lib.rs b/src/lib.rs index fd95381..33937a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,11 @@ //! // #![deny(missing_docs)] // TODO: add this back in. +#[macro_use] +extern crate html5ever; +#[macro_use] +extern crate matches; + mod css; mod document; mod dom_tree; diff --git a/src/matcher.rs b/src/matcher.rs index 592177d..366a1f9 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -1,8 +1,8 @@ -use crate::css::{LocalNameCSS, StringCSS}; +use crate::css::{CssLocalName, CssString}; use crate::dom_tree::{NodeData, NodeId, NodeRef}; use cssparser::ParseError; -use cssparser::{self, ToCss}; +use cssparser::{self, ToCss, SourceLocation, CowRcStr}; use html5ever::Namespace; use selectors::matching; use selectors::parser::{self, SelectorList, SelectorParseErrorKind}; @@ -130,6 +130,43 @@ pub(crate) struct InnerSelectorParser; impl<'i> parser::Parser<'i> for InnerSelectorParser { type Impl = InnerSelector; type Error = parser::SelectorParseErrorKind<'i>; + + fn parse_non_ts_pseudo_class( + &self, + location: SourceLocation, + name: CowRcStr<'i>, + ) -> Result> { + + use self::NonTSPseudoClass::*; + if name.eq_ignore_ascii_case("any-link") { + Ok(AnyLink) + } else if name.eq_ignore_ascii_case("link") { + Ok(Link) + } else if name.eq_ignore_ascii_case("visited") { + Ok(Visited) + } else if name.eq_ignore_ascii_case("active") { + Ok(Active) + } else if name.eq_ignore_ascii_case("focus") { + Ok(Focus) + } else if name.eq_ignore_ascii_case("hover") { + Ok(Hover) + } else if name.eq_ignore_ascii_case("enabled") { + Ok(Enabled) + } else if name.eq_ignore_ascii_case("disabled") { + Ok(Disabled) + } else if name.eq_ignore_ascii_case("checked") { + Ok(Checked) + } else if name.eq_ignore_ascii_case("indeterminate") { + Ok(Indeterminate) + } else { + Err( + location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + } } #[derive(Debug, Clone)] @@ -137,27 +174,51 @@ pub struct InnerSelector; impl parser::SelectorImpl for InnerSelector { type ExtraMatchingData = String; - type AttrValue = StringCSS; - type Identifier = LocalNameCSS; - type LocalName = LocalNameCSS; + type AttrValue = CssString; + type Identifier = CssLocalName; + type LocalName = CssLocalName; type NamespaceUrl = Namespace; - type NamespacePrefix = LocalNameCSS; - type BorrowedLocalName = LocalNameCSS; + type NamespacePrefix = CssLocalName; + type BorrowedLocalName = CssLocalName; type BorrowedNamespaceUrl = Namespace; type NonTSPseudoClass = NonTSPseudoClass; type PseudoElement = PseudoElement; } -#[derive(Clone, Eq, PartialEq)] -pub struct NonTSPseudoClass; + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum NonTSPseudoClass { + AnyLink, + Link, + Visited, + Active, + Focus, + Hover, + Enabled, + Disabled, + Checked, + Indeterminate, +} + impl ToCss for NonTSPseudoClass { fn to_css(&self, dest: &mut W) -> fmt::Result where W: fmt::Write, { - dest.write_str("") + dest.write_str(match *self { + NonTSPseudoClass::AnyLink => ":any-link", + NonTSPseudoClass::Link => ":link", + NonTSPseudoClass::Visited => ":visited", + NonTSPseudoClass::Active => ":active", + NonTSPseudoClass::Focus => ":focus", + NonTSPseudoClass::Hover => ":hover", + NonTSPseudoClass::Enabled => ":enabled", + NonTSPseudoClass::Disabled => ":disabled", + NonTSPseudoClass::Checked => ":checked", + NonTSPseudoClass::Indeterminate => ":indeterminate", + }) } } From 7df4f41a82ce55971fb0dcb960a3dfa083ca1ad8 Mon Sep 17 00:00:00 2001 From: niklak Date: Mon, 23 Jan 2023 17:31:29 +0200 Subject: [PATCH 03/12] add support for ":has()" pseudo-class --- src/element.rs | 34 +++++++++++++++++++++----- src/matcher.rs | 65 +++++++++++++++++++++++++++++++++++++------------- 2 files changed, 77 insertions(+), 22 deletions(-) diff --git a/src/element.rs b/src/element.rs index b00d26b..656716f 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,17 +1,18 @@ use crate::css::CssLocalName; -use crate::dom_tree::{Node, NodeData}; +use crate::dom_tree::{Node, NodeData, NodeRef}; use crate::matcher::{InnerSelector, NonTSPseudoClass}; use std::ops::Deref; +use cssparser::ToCss; use markup5ever::{namespace_url, ns}; use selectors::attr::AttrSelectorOperation; use selectors::attr::CaseSensitivity; use selectors::attr::NamespaceConstraint; use selectors::context::MatchingContext; -use selectors::matching::ElementSelectorFlags; +use selectors::matching::{ElementSelectorFlags, matches_selector_list}; use selectors::parser::SelectorImpl; -use selectors::OpaqueElement; +use selectors::{OpaqueElement, SelectorList, Element}; impl<'a> selectors::Element for Node<'a> { type Impl = InnerSelector; @@ -116,7 +117,7 @@ impl<'a> selectors::Element for Node<'a> { fn match_non_ts_pseudo_class( &self, pseudo: &::NonTSPseudoClass, - _context: &mut MatchingContext, + context: &mut MatchingContext, _flags_setter: &mut F, ) -> bool where @@ -124,7 +125,7 @@ impl<'a> selectors::Element for Node<'a> { { use self::NonTSPseudoClass::*; - match *pseudo { + match pseudo { Active | Focus | Hover | Enabled | Disabled | Checked | Indeterminate | Visited => { false } @@ -136,7 +137,13 @@ impl<'a> selectors::Element for Node<'a> { }, None => false, } - + + }, + Has(list) => { + //it checks only in self, not in inlines! + has_descendant_match(self, list, context) + + //true } } } @@ -224,3 +231,18 @@ impl<'a> selectors::Element for Node<'a> { self.is_document() } } + + +fn has_descendant_match(n: &NodeRef, selectors_list: &Box>, ctx: &mut MatchingContext) -> bool { + let mut node = n.first_child(); + while let Some(ref n) = node { + + if matches_selector_list(&selectors_list, n, ctx) { + return true; + } else if n.is_element() && has_descendant_match(n, selectors_list, ctx) { + return true; + } + node = n.next_sibling(); + } + false +} \ No newline at end of file diff --git a/src/matcher.rs b/src/matcher.rs index 366a1f9..41721df 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -1,11 +1,13 @@ use crate::css::{CssLocalName, CssString}; use crate::dom_tree::{NodeData, NodeId, NodeRef}; +use std::convert::Into; + use cssparser::ParseError; use cssparser::{self, ToCss, SourceLocation, CowRcStr}; use html5ever::Namespace; -use selectors::matching; -use selectors::parser::{self, SelectorList, SelectorParseErrorKind}; +use selectors::{matching, SelectorImpl}; +use selectors::parser::{self, SelectorList, SelectorParseErrorKind, Selector}; use selectors::visitor; use selectors::Element; use std::collections::HashSet; @@ -158,7 +160,7 @@ impl<'i> parser::Parser<'i> for InnerSelectorParser { Ok(Checked) } else if name.eq_ignore_ascii_case("indeterminate") { Ok(Indeterminate) - } else { + }else { Err( location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( name, @@ -167,9 +169,32 @@ impl<'i> parser::Parser<'i> for InnerSelectorParser { } } + fn parse_non_ts_functional_pseudo_class<'t>( + &self, + name: CowRcStr<'i>, + arguments: &mut cssparser::Parser<'i, 't>, + ) -> Result> { + + if name.starts_with("has") { + + let list:SelectorList = SelectorList::parse( + self, + arguments, + )?; + Ok(NonTSPseudoClass::Has(Box::new(list))) + }else { + Err( + arguments.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + } + } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct InnerSelector; impl parser::SelectorImpl for InnerSelector { @@ -199,6 +224,7 @@ pub enum NonTSPseudoClass { Disabled, Checked, Indeterminate, + Has(Box>), } @@ -207,18 +233,25 @@ impl ToCss for NonTSPseudoClass { where W: fmt::Write, { - dest.write_str(match *self { - NonTSPseudoClass::AnyLink => ":any-link", - NonTSPseudoClass::Link => ":link", - NonTSPseudoClass::Visited => ":visited", - NonTSPseudoClass::Active => ":active", - NonTSPseudoClass::Focus => ":focus", - NonTSPseudoClass::Hover => ":hover", - NonTSPseudoClass::Enabled => ":enabled", - NonTSPseudoClass::Disabled => ":disabled", - NonTSPseudoClass::Checked => ":checked", - NonTSPseudoClass::Indeterminate => ":indeterminate", - }) + println!("{:?}", self); + match self { + NonTSPseudoClass::AnyLink => dest.write_str(":any-link"), + NonTSPseudoClass::Link => dest.write_str(":link"), + NonTSPseudoClass::Visited => dest.write_str(":visited"), + NonTSPseudoClass::Active => dest.write_str(":active"), + NonTSPseudoClass::Focus => dest.write_str(":focus"), + NonTSPseudoClass::Hover => dest.write_str(":hover"), + NonTSPseudoClass::Enabled => dest.write_str(":enabled"), + NonTSPseudoClass::Disabled => dest.write_str(":disabled"), + NonTSPseudoClass::Checked => dest.write_str(":checked"), + NonTSPseudoClass::Indeterminate => dest.write_str(":indeterminate"), + NonTSPseudoClass::Has(list) => { + println!("{}", list.to_css_string()); + dest.write_str("has:(")?; + list.to_css(dest)?; + dest.write_str(")") + } + } } } From fafd982e3a86ee23af7107447f96e280a3fddceb Mon Sep 17 00:00:00 2001 From: niklak Date: Wed, 25 Jan 2023 15:32:54 +0200 Subject: [PATCH 04/12] minor adjustments --- src/css.rs | 3 +-- src/element.rs | 35 ++++++++++++++++------------------- src/matcher.rs | 35 +++++++++++------------------------ 3 files changed, 28 insertions(+), 45 deletions(-) diff --git a/src/css.rs b/src/css.rs index 5ba9a9e..754cfd1 100644 --- a/src/css.rs +++ b/src/css.rs @@ -2,7 +2,7 @@ use std::convert::AsRef; use std::fmt; use std::ops::Deref; -use cssparser::{self, ToCss, serialize_string}; +use cssparser::{self, serialize_string, ToCss}; use html5ever::LocalName; #[derive(Clone, Eq, PartialEq, Debug)] @@ -63,4 +63,3 @@ impl Deref for CssLocalName { &self.0 } } - diff --git a/src/element.rs b/src/element.rs index 656716f..ddcd871 100644 --- a/src/element.rs +++ b/src/element.rs @@ -10,9 +10,9 @@ use selectors::attr::AttrSelectorOperation; use selectors::attr::CaseSensitivity; use selectors::attr::NamespaceConstraint; use selectors::context::MatchingContext; -use selectors::matching::{ElementSelectorFlags, matches_selector_list}; +use selectors::matching::{matches_selector_list, ElementSelectorFlags}; use selectors::parser::SelectorImpl; -use selectors::{OpaqueElement, SelectorList, Element}; +use selectors::{Element, OpaqueElement, SelectorList}; impl<'a> selectors::Element for Node<'a> { type Impl = InnerSelector; @@ -123,26 +123,22 @@ impl<'a> selectors::Element for Node<'a> { where F: FnMut(&Self, ElementSelectorFlags), { - use self::NonTSPseudoClass::*; match pseudo { Active | Focus | Hover | Enabled | Disabled | Checked | Indeterminate | Visited => { false } - AnyLink | Link => { - match self.node_name() { - Some(node_name) => { - matches!(node_name.deref(),"a" | "area" | "link") + AnyLink | Link => match self.node_name() { + Some(node_name) => { + matches!(node_name.deref(), "a" | "area" | "link") && self.attr("href").is_some() - }, - None => false, } - + None => false, }, Has(list) => { //it checks only in self, not in inlines! has_descendant_match(self, list, context) - + //true } } @@ -232,17 +228,18 @@ impl<'a> selectors::Element for Node<'a> { } } - -fn has_descendant_match(n: &NodeRef, selectors_list: &Box>, ctx: &mut MatchingContext) -> bool { - let mut node = n.first_child(); +fn has_descendant_match( + n: &NodeRef, + selectors_list: &SelectorList, + ctx: &mut MatchingContext, +) -> bool { + let mut node = n.first_child(); while let Some(ref n) = node { - - if matches_selector_list(&selectors_list, n, ctx) { - return true; - } else if n.is_element() && has_descendant_match(n, selectors_list, ctx) { + if matches_selector_list(&selectors_list, n, ctx) + || (n.is_element() && has_descendant_match(n, selectors_list, ctx)) { return true; } node = n.next_sibling(); } false -} \ No newline at end of file +} diff --git a/src/matcher.rs b/src/matcher.rs index 41721df..f7e940c 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -4,12 +4,12 @@ use crate::dom_tree::{NodeData, NodeId, NodeRef}; use std::convert::Into; use cssparser::ParseError; -use cssparser::{self, ToCss, SourceLocation, CowRcStr}; +use cssparser::{self, CowRcStr, SourceLocation, ToCss}; use html5ever::Namespace; -use selectors::{matching, SelectorImpl}; -use selectors::parser::{self, SelectorList, SelectorParseErrorKind, Selector}; +use selectors::parser::{self, Selector, SelectorList, SelectorParseErrorKind}; use selectors::visitor; use selectors::Element; +use selectors::{matching, SelectorImpl}; use std::collections::HashSet; use std::fmt; @@ -138,7 +138,6 @@ impl<'i> parser::Parser<'i> for InnerSelectorParser { location: SourceLocation, name: CowRcStr<'i>, ) -> Result> { - use self::NonTSPseudoClass::*; if name.eq_ignore_ascii_case("any-link") { Ok(AnyLink) @@ -160,38 +159,28 @@ impl<'i> parser::Parser<'i> for InnerSelectorParser { Ok(Checked) } else if name.eq_ignore_ascii_case("indeterminate") { Ok(Indeterminate) - }else { + } else { Err( location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( name, )), ) } - } fn parse_non_ts_functional_pseudo_class<'t>( &self, name: CowRcStr<'i>, arguments: &mut cssparser::Parser<'i, 't>, ) -> Result> { - if name.starts_with("has") { - - let list:SelectorList = SelectorList::parse( - self, - arguments, - )?; - Ok(NonTSPseudoClass::Has(Box::new(list))) - }else { - Err( - arguments.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( - name, - )), - ) + let list: SelectorList = SelectorList::parse(self, arguments)?; + Ok(NonTSPseudoClass::Has(list)) + } else { + Err(arguments.new_custom_error( + SelectorParseErrorKind::UnsupportedPseudoClassOrElement(name), + )) } - } - } #[derive(Debug, Clone, PartialEq, Eq)] @@ -211,7 +200,6 @@ impl parser::SelectorImpl for InnerSelector { type PseudoElement = PseudoElement; } - #[derive(PartialEq, Eq, Clone, Debug)] pub enum NonTSPseudoClass { AnyLink, @@ -224,10 +212,9 @@ pub enum NonTSPseudoClass { Disabled, Checked, Indeterminate, - Has(Box>), + Has(SelectorList), } - impl ToCss for NonTSPseudoClass { fn to_css(&self, dest: &mut W) -> fmt::Result where From 93e0b6a62e76add272372bae6a1aafb9b2ab875e Mon Sep 17 00:00:00 2001 From: niklak Date: Wed, 25 Jan 2023 16:20:03 +0200 Subject: [PATCH 05/12] run clippy --- src/css.rs | 8 ++++---- src/element.rs | 3 +-- src/matcher.rs | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/css.rs b/src/css.rs index 754cfd1..5a55d19 100644 --- a/src/css.rs +++ b/src/css.rs @@ -2,7 +2,7 @@ use std::convert::AsRef; use std::fmt; use std::ops::Deref; -use cssparser::{self, serialize_string, ToCss}; +use cssparser::{self, ToCss}; use html5ever::LocalName; #[derive(Clone, Eq, PartialEq, Debug)] @@ -18,13 +18,13 @@ impl Deref for CssString { impl AsRef for CssString { fn as_ref(&self) -> &str { - return &self.0; + &self.0 } } impl From<&str> for CssString { fn from(value: &str) -> Self { - return CssString(value.to_owned()); + CssString(value.to_owned() } } @@ -52,7 +52,7 @@ impl ToCss for CssLocalName { impl From<&str> for CssLocalName { fn from(value: &str) -> Self { - return CssLocalName(value.into()); + CssLocalName(value.into()) } } diff --git a/src/element.rs b/src/element.rs index ddcd871..08258bd 100644 --- a/src/element.rs +++ b/src/element.rs @@ -4,7 +4,6 @@ use crate::matcher::{InnerSelector, NonTSPseudoClass}; use std::ops::Deref; -use cssparser::ToCss; use markup5ever::{namespace_url, ns}; use selectors::attr::AttrSelectorOperation; use selectors::attr::CaseSensitivity; @@ -12,7 +11,7 @@ use selectors::attr::NamespaceConstraint; use selectors::context::MatchingContext; use selectors::matching::{matches_selector_list, ElementSelectorFlags}; use selectors::parser::SelectorImpl; -use selectors::{Element, OpaqueElement, SelectorList}; +use selectors::{ OpaqueElement, SelectorList}; impl<'a> selectors::Element for Node<'a> { type Impl = InnerSelector; diff --git a/src/matcher.rs b/src/matcher.rs index f7e940c..e56b90e 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -6,10 +6,10 @@ use std::convert::Into; use cssparser::ParseError; use cssparser::{self, CowRcStr, SourceLocation, ToCss}; use html5ever::Namespace; -use selectors::parser::{self, Selector, SelectorList, SelectorParseErrorKind}; +use selectors::parser::{self, SelectorList, SelectorParseErrorKind}; use selectors::visitor; use selectors::Element; -use selectors::{matching, SelectorImpl}; +use selectors::{matching}; use std::collections::HashSet; use std::fmt; From dda0402e5c7e2a2394050da0c920f6544138dc42 Mon Sep 17 00:00:00 2001 From: niklak Date: Wed, 25 Jan 2023 16:40:51 +0200 Subject: [PATCH 06/12] fix 'impl From<&str> for CssString' --- src/css.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/css.rs b/src/css.rs index 5a55d19..82cffab 100644 --- a/src/css.rs +++ b/src/css.rs @@ -24,7 +24,7 @@ impl AsRef for CssString { impl From<&str> for CssString { fn from(value: &str) -> Self { - CssString(value.to_owned() + CssString(value.to_owned()) } } From f1532b3ed28588c44109f3aff91f2b5fe9eb6d16 Mon Sep 17 00:00:00 2001 From: niklak Date: Wed, 25 Jan 2023 18:22:32 +0200 Subject: [PATCH 07/12] changes with respect to clippy check --- Cargo.toml | 1 - src/css.rs | 2 +- src/document.rs | 10 +++--- src/dom_tree.rs | 80 +++++++++++++++++++-------------------------- src/element.rs | 7 ++-- src/lib.rs | 3 -- src/manipulation.rs | 8 ++--- src/matcher.rs | 8 ++--- src/query.rs | 2 +- src/selection.rs | 8 +---- src/traversal.rs | 8 ++--- 11 files changed, 54 insertions(+), 83 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index faecd3c..46b6050 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,6 @@ selectors = "0.23.0" cssparser = "0.28.1" tendril = "0.4.2" markup5ever = "0.11.0" -matches = "0.1.4" [dev-dependencies] reqwest = { version = "0.11.3", features = ["blocking"] } diff --git a/src/css.rs b/src/css.rs index 82cffab..8b1fa67 100644 --- a/src/css.rs +++ b/src/css.rs @@ -18,7 +18,7 @@ impl Deref for CssString { impl AsRef for CssString { fn as_ref(&self) -> &str { - &self.0 + &self.0 } } diff --git a/src/document.rs b/src/document.rs index cdd34ea..c2103d8 100644 --- a/src/document.rs +++ b/src/document.rs @@ -92,7 +92,7 @@ impl TreeSink for Document { NodeData::Element(Element { template_contents: Some(ref contents), .. - }) => contents.clone(), + }) => *contents, _ => panic!("not a template element!"), }) } @@ -151,7 +151,7 @@ impl TreeSink for Document { // Create a Processing Instruction node. fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> NodeId { self.tree.create_node(NodeData::ProcessingInstruction { - target: target, + target, contents: data, }) } @@ -243,9 +243,9 @@ impl TreeSink for Document { self.tree.append_child_data_of( &root, NodeData::Doctype { - name: name, - public_id: public_id, - system_id: system_id, + name, + public_id, + system_id, }, ); } diff --git a/src/dom_tree.rs b/src/dom_tree.rs index 9969053..2540a7b 100644 --- a/src/dom_tree.rs +++ b/src/dom_tree.rs @@ -265,7 +265,7 @@ impl Tree { with_cell_mut!(self.nodes, nodes, { let mut new_nodes = tree.nodes.into_inner(); assert!( - new_nodes.len() > 0, + !new_nodes.is_empty(), "The tree should have at leaset one root node" ); assert!( @@ -317,7 +317,7 @@ impl Tree { node.parent = node.parent.and_then(|parent_id| match parent_id.value { i if i < TRUE_ROOT_ID => None, i if i == TRUE_ROOT_ID => Some(*id), - i @ _ => fix_id!(Some(NodeId::new(i))), + i => fix_id!(Some(NodeId::new(i))), }); // Update prev_sibling_id @@ -343,7 +343,7 @@ impl Tree { with_cell_mut!(self.nodes, nodes, { let mut new_nodes = tree.nodes.into_inner(); assert!( - new_nodes.len() > 0, + !new_nodes.is_empty(), "The tree should have at leaset one root node" ); assert!( @@ -388,17 +388,16 @@ impl Tree { parent.first_child = first_child_id; } - let mut i = 0; let mut last_valid_child = 0; let mut first_valid_child = true; // Fix nodes's ref id. - for node in new_nodes.iter_mut() { + for (i, node) in new_nodes.iter_mut().enumerate() { node.parent = node .parent .and_then(|old_parent_id| match old_parent_id.value { i if i < TRUE_ROOT_ID => None, i if i == TRUE_ROOT_ID => parent_id, - i @ _ => fix_id!(Some(NodeId::new(i))), + i => fix_id!(Some(NodeId::new(i))), }); // Update first child's prev_sibling @@ -416,11 +415,10 @@ impl Tree { node.last_child = fix_id!(node.last_child); node.prev_sibling = fix_id!(node.prev_sibling); node.next_sibling = fix_id!(node.next_sibling); - i += 1; } // Update last child's next_sibling. - new_nodes[last_valid_child as usize].next_sibling = Some(*id); + new_nodes[last_valid_child].next_sibling = Some(*id); // Put all the new nodes except the root node into the nodes. nodes.extend(new_nodes); @@ -557,9 +555,8 @@ impl Tree { let node_a = unsafe { nodes.get_unchecked(a.value) }; let node_b = unsafe { nodes.get_unchecked(b.value) }; - let r = f(node_a, node_b); // self.nodes.set(nodes); - r + f(node_a, node_b) } } @@ -603,36 +600,27 @@ impl Debug for InnerNode { impl InnerNode { pub fn is_document(&self) -> bool { - match self.data { - NodeData::Document => true, - _ => false, - } + matches!(self.data, NodeData::Document) } pub fn is_element(&self) -> bool { - match self.data { - NodeData::Element(_) => true, - _ => false, - } + matches!(self.data, NodeData::Element(_)) } pub fn is_text(&self) -> bool { - match self.data { - NodeData::Text { .. } => true, - _ => false, - } + matches!(self.data, NodeData::Text { .. }) } } impl Clone for InnerNode { fn clone(&self) -> Self { Self { - id: self.id.clone(), - parent: self.parent.clone(), - prev_sibling: self.prev_sibling.clone(), - next_sibling: self.next_sibling.clone(), - first_child: self.first_child.clone(), - last_child: self.last_child.clone(), + id: self.id, + parent: self.parent, + prev_sibling: self.prev_sibling, + next_sibling: self.next_sibling, + first_child: self.first_child, + last_child: self.last_child, data: self.data.clone(), } } @@ -775,18 +763,18 @@ impl<'a> Node<'a> { } pub fn add_class(&self, class: &str) { - if class.trim().len() == 0 { + if class.trim().is_empty() { return; } - self.update(|node| match node.data { - NodeData::Element(ref mut e) => { + self.update(|node| { + if let NodeData::Element(ref mut e) = node.data { let mut attr = e.attrs.iter_mut().find(|attr| &attr.name.local == "class"); let set: HashSet = class - .split(" ") + .split(' ') .map(|s| s.trim()) - .filter(|s| s.len() > 0) + .filter(|s| !s.is_empty()) .map(|s| s.to_string()) .collect(); @@ -807,29 +795,28 @@ impl<'a> Node<'a> { e.attrs.push(Attribute { name, value }) } } - _ => (), }) } pub fn remove_class(&self, class: &str) { - if class.trim().len() == 0 { + if class.trim().is_empty() { return; } - self.update(|node| match node.data { - NodeData::Element(ref mut e) => { + self.update(|node| { + if let NodeData::Element(ref mut e) = node.data { e.attrs .iter_mut() .find(|attr| &attr.name.local == "class") .map(|attr| { let mut set: HashSet<&str> = attr .value - .split(" ") + .split(' ') .map(|s| s.trim()) - .filter(|s| s.len() > 0) + .filter(|s| !s.is_empty()) .collect(); - let removes = class.split(" ").map(|s| s.trim()).filter(|s| s.len() > 0); + let removes = class.split(' ').map(|s| s.trim()).filter(|s| !s.is_empty()); for remove in removes { set.remove(remove); @@ -839,7 +826,6 @@ impl<'a> Node<'a> { StrTendril::from(set.into_iter().collect::>().join(" ")); }); } - _ => (), }) } @@ -856,7 +842,7 @@ impl<'a> Node<'a> { pub fn attrs(&self) -> Vec { self.query(|node| match node.data { - NodeData::Element(ref e) => e.attrs.iter().map(|attr| attr.clone()).collect(), + NodeData::Element(ref e) => e.attrs.to_vec(), _ => vec![], }) } @@ -939,7 +925,7 @@ impl<'a> Node<'a> { } } - NodeData::Text { ref contents } => text.push_tendril(&contents), + NodeData::Text { ref contents } => text.push_tendril(contents), _ => continue, } @@ -1040,7 +1026,7 @@ impl<'a> Serialize for SerializableNodeRef<'a> { IncludeNode => vec![SerializeOp::Open(id)], ChildrenOnly(_) => children_of!(nodes, id) .into_iter() - .map(|h| SerializeOp::Open(h)) + .map(SerializeOp::Open) .collect(), }; @@ -1061,9 +1047,9 @@ impl<'a> Serialize for SerializableNodeRef<'a> { Ok(()) } - NodeData::Doctype { ref name, .. } => serializer.write_doctype(&name), - NodeData::Text { ref contents } => serializer.write_text(&contents), - NodeData::Comment { ref contents } => serializer.write_comment(&contents), + NodeData::Doctype { ref name, .. } => serializer.write_doctype(name), + NodeData::Text { ref contents } => serializer.write_text(contents), + NodeData::Comment { ref contents } => serializer.write_comment(contents), NodeData::ProcessingInstruction { ref target, ref contents, diff --git a/src/element.rs b/src/element.rs index 08258bd..871c870 100644 --- a/src/element.rs +++ b/src/element.rs @@ -11,7 +11,7 @@ use selectors::attr::NamespaceConstraint; use selectors::context::MatchingContext; use selectors::matching::{matches_selector_list, ElementSelectorFlags}; use selectors::parser::SelectorImpl; -use selectors::{ OpaqueElement, SelectorList}; +use selectors::{OpaqueElement, SelectorList}; impl<'a> selectors::Element for Node<'a> { type Impl = InnerSelector; @@ -234,8 +234,9 @@ fn has_descendant_match( ) -> bool { let mut node = n.first_child(); while let Some(ref n) = node { - if matches_selector_list(&selectors_list, n, ctx) - || (n.is_element() && has_descendant_match(n, selectors_list, ctx)) { + if matches_selector_list(selectors_list, n, ctx) + || (n.is_element() && has_descendant_match(n, selectors_list, ctx)) + { return true; } node = n.next_sibling(); diff --git a/src/lib.rs b/src/lib.rs index 33937a3..c35cc25 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,10 +27,7 @@ //! // #![deny(missing_docs)] // TODO: add this back in. -#[macro_use] extern crate html5ever; -#[macro_use] -extern crate matches; mod css; mod document; diff --git a/src/manipulation.rs b/src/manipulation.rs index 02ed3ad..d709f50 100644 --- a/src/manipulation.rs +++ b/src/manipulation.rs @@ -62,16 +62,14 @@ impl<'a> Selection<'a> { T: Into, { let dom = parse_html!(html); - let mut i = 0; - for node in self.nodes() { + for (i, node) in self.nodes().iter().enumerate() { if i + 1 == self.size() { node.append_prev_siblings_from_another_tree(dom.tree); break; } else { node.append_prev_siblings_from_another_tree(dom.tree.clone()); } - i += 1; } self.remove() @@ -97,16 +95,14 @@ impl<'a> Selection<'a> { T: Into, { let dom = parse_html!(html); - let mut i = 0; - for node in self.nodes() { + for (i, node) in self.nodes().iter().enumerate() { if i + 1 == self.size() { node.append_children_from_another_tree(dom.tree); break; } else { node.append_children_from_another_tree(dom.tree.clone()); } - i += 1; } } diff --git a/src/matcher.rs b/src/matcher.rs index e56b90e..9e48a51 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -1,15 +1,13 @@ use crate::css::{CssLocalName, CssString}; use crate::dom_tree::{NodeData, NodeId, NodeRef}; -use std::convert::Into; - use cssparser::ParseError; use cssparser::{self, CowRcStr, SourceLocation, ToCss}; use html5ever::Namespace; +use selectors::matching; use selectors::parser::{self, SelectorList, SelectorParseErrorKind}; use selectors::visitor; use selectors::Element; -use selectors::{matching}; use std::collections::HashSet; use std::fmt; @@ -64,7 +62,7 @@ impl Matches { Self { roots: vec![node], nodes: vec![], - matcher: matcher, + matcher, set: HashSet::new(), match_scope, } @@ -78,7 +76,7 @@ impl Matches { Self { roots: nodes.collect(), nodes: vec![], - matcher: matcher, + matcher, set: HashSet::new(), match_scope, } diff --git a/src/query.rs b/src/query.rs index 46d5f91..3dff84d 100644 --- a/src/query.rs +++ b/src/query.rs @@ -21,7 +21,7 @@ impl<'a> Selection<'a> { if self.length() > 0 { return self .nodes() - .into_iter() + .iter() .filter(|node| matcher.match_element(*node)) .count() > 0; diff --git a/src/selection.rs b/src/selection.rs index f533326..f70083b 100644 --- a/src/selection.rs +++ b/src/selection.rs @@ -3,17 +3,11 @@ use crate::dom_tree::Node; /// Selection represents a collection of nodes matching some criteria. The /// initial Selection object can be created by using [`Document::select`], and then /// manipulated using methods itself. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct Selection<'a> { pub(crate) nodes: Vec>, } -impl<'a> Default for Selection<'a> { - fn default() -> Self { - Self { nodes: vec![] } - } -} - impl<'a> From> for Selection<'a> { fn from(node: Node<'a>) -> Selection { Self { nodes: vec![node] } diff --git a/src/traversal.rs b/src/traversal.rs index 7dbccff..abdc05c 100644 --- a/src/traversal.rs +++ b/src/traversal.rs @@ -16,7 +16,7 @@ impl Document { let matcher = Matcher::new(sel).expect("Invalid CSS selector"); let root = self.tree.root(); Selection { - nodes: Matches::from_one(root, matcher.clone(), MatchScope::IncludeNode).collect(), + nodes: Matches::from_one(root, matcher, MatchScope::IncludeNode).collect(), } } @@ -37,8 +37,8 @@ impl Document { Ok(matcher) => { let root = self.tree.root(); let nodes: Vec = - Matches::from_one(root, matcher.clone(), MatchScope::ChildrenOnly).collect(); - if nodes.len() > 0 { + Matches::from_one(root, matcher, MatchScope::ChildrenOnly).collect(); + if !nodes.is_empty() { Some(Selection { nodes }) } else { None @@ -101,7 +101,7 @@ impl<'a> Selection<'a> { MatchScope::ChildrenOnly, ) .collect(); - if nodes.len() > 0 { + if !nodes.is_empty() { Some(Selection { nodes }) } else { None From 521982adeec499e6edd024df8687528659589f68 Mon Sep 17 00:00:00 2001 From: niklak Date: Wed, 25 Jan 2023 19:08:22 +0200 Subject: [PATCH 08/12] add tests for pseudo-classes --- tests/pseudo-class.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/pseudo-class.rs diff --git a/tests/pseudo-class.rs b/tests/pseudo-class.rs new file mode 100644 index 0000000..27841d6 --- /dev/null +++ b/tests/pseudo-class.rs @@ -0,0 +1,33 @@ +use nipper::Document; + +#[test] +fn test_pseudo_class_has() { + let html = r#" +
+ One + Two + Three +
"#; + let document = Document::from(html); + let sel = r#"div:has(a[href="/1"]) a span"#; + let span = document.select(sel); + + let text: &str = &span.text(); + assert!(text == "Three"); +} + +#[test] +fn test_pseudo_class_has_any_link() { + let html = r#" +
+ One + Two + Three +
"#; + let document = Document::from(html); + let sel = r#"div:has(*:any-link) a span"#; + let span = document.select(sel); + + let text: &str = &span.text(); + assert!(text == "Three"); +} From 52b163f0652793f9ce86b364c14c8ccd0dad71bb Mon Sep 17 00:00:00 2001 From: niklak Date: Wed, 25 Jan 2023 21:57:18 +0200 Subject: [PATCH 09/12] src/matcher.rs: clean NonTSPseudoClass.to_css --- src/matcher.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/matcher.rs b/src/matcher.rs index 9e48a51..90212f0 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -218,7 +218,6 @@ impl ToCss for NonTSPseudoClass { where W: fmt::Write, { - println!("{:?}", self); match self { NonTSPseudoClass::AnyLink => dest.write_str(":any-link"), NonTSPseudoClass::Link => dest.write_str(":link"), @@ -231,7 +230,6 @@ impl ToCss for NonTSPseudoClass { NonTSPseudoClass::Checked => dest.write_str(":checked"), NonTSPseudoClass::Indeterminate => dest.write_str(":indeterminate"), NonTSPseudoClass::Has(list) => { - println!("{}", list.to_css_string()); dest.write_str("has:(")?; list.to_css(dest)?; dest.write_str(")") From e91808b7ef963473f4f3c9d91b91bf9f3c7358d0 Mon Sep 17 00:00:00 2001 From: niklak Date: Wed, 22 Feb 2023 16:30:47 +0200 Subject: [PATCH 10/12] switch Matches.set to IndexSet --- Cargo.toml | 3 +++ src/matcher.rs | 12 ++++++++---- tests/pseudo-class.rs | 4 ++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 46b6050..18fade8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,9 @@ cssparser = "0.28.1" tendril = "0.4.2" markup5ever = "0.11.0" +indexmap = "1.9.2" +fxhash = "0.2.1" + [dev-dependencies] reqwest = { version = "0.11.3", features = ["blocking"] } regex = "1.4.5" diff --git a/src/matcher.rs b/src/matcher.rs index 90212f0..4236bfa 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -8,9 +8,13 @@ use selectors::matching; use selectors::parser::{self, SelectorList, SelectorParseErrorKind}; use selectors::visitor; use selectors::Element; -use std::collections::HashSet; use std::fmt; +use fxhash::FxBuildHasher; +use indexmap::IndexSet; + +pub type NodeIdSet = IndexSet; + /// CSS selector. #[derive(Clone, Debug)] pub struct Matcher { @@ -46,7 +50,7 @@ pub struct Matches { roots: Vec, nodes: Vec, matcher: Matcher, - set: HashSet, + set: NodeIdSet, match_scope: MatchScope, } @@ -63,7 +67,7 @@ impl Matches { roots: vec![node], nodes: vec![], matcher, - set: HashSet::new(), + set: NodeIdSet::default(), match_scope, } } @@ -77,7 +81,7 @@ impl Matches { roots: nodes.collect(), nodes: vec![], matcher, - set: HashSet::new(), + set: NodeIdSet::default(), match_scope, } } diff --git a/tests/pseudo-class.rs b/tests/pseudo-class.rs index 27841d6..fd5fa5a 100644 --- a/tests/pseudo-class.rs +++ b/tests/pseudo-class.rs @@ -16,7 +16,7 @@ fn test_pseudo_class_has() { assert!(text == "Three"); } -#[test] +#[test ] fn test_pseudo_class_has_any_link() { let html = r#"
@@ -30,4 +30,4 @@ fn test_pseudo_class_has_any_link() { let text: &str = &span.text(); assert!(text == "Three"); -} +} \ No newline at end of file From 73dcf34dbcacae71216918e24683b57b2166432a Mon Sep 17 00:00:00 2001 From: Nik Date: Thu, 21 Dec 2023 13:05:46 +0200 Subject: [PATCH 11/12] src/dom_tree.rs: apply clippy --- src/dom_tree.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dom_tree.rs b/src/dom_tree.rs index 2540a7b..8baee95 100644 --- a/src/dom_tree.rs +++ b/src/dom_tree.rs @@ -296,7 +296,7 @@ impl Tree { let last_child_id = fix_id!(root.last_child); // Update new parent's first and last child id. - let mut parent = get_node_unchecked_mut!(nodes, id); + let parent = get_node_unchecked_mut!(nodes, id); if parent.first_child.is_none() { parent.first_child = first_child_id; } @@ -306,7 +306,7 @@ impl Tree { // Update next_sibling_id if let Some(last_child_id) = parent_last_child_id { - let mut last_child = get_node_unchecked_mut!(nodes, last_child_id); + let last_child = get_node_unchecked_mut!(nodes, last_child_id); last_child.next_sibling = first_child_id; } @@ -371,7 +371,7 @@ impl Tree { let first_child_id = fix_id!(root.first_child); let last_child_id = fix_id!(root.last_child); - let mut node = get_node_unchecked_mut!(nodes, id); + let node = get_node_unchecked_mut!(nodes, id); let prev_sibling_id = node.prev_sibling; let parent_id = node.parent; @@ -380,11 +380,11 @@ impl Tree { // Update prev sibling's next sibling if let Some(prev_sibling_id) = prev_sibling_id { - let mut prev_sibling = get_node_unchecked_mut!(nodes, prev_sibling_id); + let prev_sibling = get_node_unchecked_mut!(nodes, prev_sibling_id); prev_sibling.next_sibling = first_child_id; // Update parent's first child. } else if let Some(parent_id) = parent_id { - let mut parent = get_node_unchecked_mut!(nodes, parent_id); + let parent = get_node_unchecked_mut!(nodes, parent_id); parent.first_child = first_child_id; } @@ -499,7 +499,7 @@ impl Tree { node.last_child = None; if let Some(new_parent_id) = new_parent_id { - let mut new_parent = get_node_unchecked_mut!(nodes, new_parent_id); + let new_parent = get_node_unchecked_mut!(nodes, new_parent_id); new_parent.first_child = first_child_id; new_parent.last_child = last_child_id; } From 1ab6a50bda4eaa6051b744c7f8b8ab35a668f088 Mon Sep 17 00:00:00 2001 From: Nik Date: Thu, 21 Dec 2023 15:43:19 +0200 Subject: [PATCH 12/12] Cargo.toml: update dependencies, src/*.rs: update code --- Cargo.toml | 8 ++++---- src/element.rs | 17 +++++++++++------ src/matcher.rs | 26 ++++++++++++++++++++------ tests/pseudo-class.rs | 4 ++-- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 18fade8..33cc120 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,17 +14,17 @@ readme = "README.md" [dependencies] html5ever = "0.26.0" -selectors = "0.23.0" -cssparser = "0.28.1" +selectors = "0.25.0" +cssparser = "0.31.2" tendril = "0.4.2" markup5ever = "0.11.0" -indexmap = "1.9.2" +indexmap = "2.1.0" fxhash = "0.2.1" [dev-dependencies] reqwest = { version = "0.11.3", features = ["blocking"] } regex = "1.4.5" lazy_static = "1.4.0" -readability = "0.2.0" +readability = "0.3.0" url = "2.2.1" diff --git a/src/element.rs b/src/element.rs index 871c870..5f9c1e7 100644 --- a/src/element.rs +++ b/src/element.rs @@ -113,15 +113,11 @@ impl<'a> selectors::Element for Node<'a> { }) } - fn match_non_ts_pseudo_class( + fn match_non_ts_pseudo_class( &self, pseudo: &::NonTSPseudoClass, context: &mut MatchingContext, - _flags_setter: &mut F, - ) -> bool - where - F: FnMut(&Self, ElementSelectorFlags), - { + ) -> bool { use self::NonTSPseudoClass::*; match pseudo { Active | Focus | Hover | Enabled | Disabled | Checked | Indeterminate | Visited => { @@ -225,6 +221,15 @@ impl<'a> selectors::Element for Node<'a> { fn is_root(&self) -> bool { self.is_document() } + + fn first_element_child(&self) -> Option { + self.children() + .iter() + .find(|&child| child.is_element()) + .cloned() + } + + fn apply_selector_flags(&self, _flags: ElementSelectorFlags) {} } fn has_descendant_match( diff --git a/src/matcher.rs b/src/matcher.rs index 4236bfa..2168b8d 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -4,10 +4,10 @@ use crate::dom_tree::{NodeData, NodeId, NodeRef}; use cssparser::ParseError; use cssparser::{self, CowRcStr, SourceLocation, ToCss}; use html5ever::Namespace; -use selectors::matching; use selectors::parser::{self, SelectorList, SelectorParseErrorKind}; use selectors::visitor; use selectors::Element; +use selectors::{matching, NthIndexCache}; use std::fmt; use fxhash::FxBuildHasher; @@ -26,19 +26,26 @@ impl Matcher { pub fn new(sel: &str) -> Result> { let mut input = cssparser::ParserInput::new(sel); let mut parser = cssparser::Parser::new(&mut input); - selectors::parser::SelectorList::parse(&InnerSelectorParser, &mut parser) - .map(|selector_list| Matcher { selector_list }) + selectors::parser::SelectorList::parse( + &InnerSelectorParser, + &mut parser, + parser::ParseRelative::ForNesting, + ) + .map(|selector_list| Matcher { selector_list }) } pub(crate) fn match_element(&self, element: &E) -> bool where E: Element, { + let mut nth_cache = NthIndexCache::default(); let mut ctx = matching::MatchingContext::new( matching::MatchingMode::Normal, None, - None, + &mut nth_cache, matching::QuirksMode::NoQuirks, + matching::NeedsSelectorFlags::No, + matching::IgnoreNthChildForInvalidation::No, ); matching::matches_selector_list(&self.selector_list, element, &mut ctx) @@ -175,7 +182,8 @@ impl<'i> parser::Parser<'i> for InnerSelectorParser { arguments: &mut cssparser::Parser<'i, 't>, ) -> Result> { if name.starts_with("has") { - let list: SelectorList = SelectorList::parse(self, arguments)?; + let list: SelectorList = + SelectorList::parse(self, arguments, parser::ParseRelative::No)?; Ok(NonTSPseudoClass::Has(list)) } else { Err(arguments.new_custom_error( @@ -189,7 +197,7 @@ impl<'i> parser::Parser<'i> for InnerSelectorParser { pub struct InnerSelector; impl parser::SelectorImpl for InnerSelector { - type ExtraMatchingData = String; + type ExtraMatchingData<'a> = (); type AttrValue = CssString; type Identifier = CssLocalName; type LocalName = CssLocalName; @@ -240,6 +248,12 @@ impl ToCss for NonTSPseudoClass { } } } + + fn to_css_string(&self) -> String { + let mut s = String::new(); + self.to_css(&mut s).unwrap(); + s + } } impl parser::NonTSPseudoClass for NonTSPseudoClass { diff --git a/tests/pseudo-class.rs b/tests/pseudo-class.rs index fd5fa5a..27841d6 100644 --- a/tests/pseudo-class.rs +++ b/tests/pseudo-class.rs @@ -16,7 +16,7 @@ fn test_pseudo_class_has() { assert!(text == "Three"); } -#[test ] +#[test] fn test_pseudo_class_has_any_link() { let html = r#"
@@ -30,4 +30,4 @@ fn test_pseudo_class_has_any_link() { let text: &str = &span.text(); assert!(text == "Three"); -} \ No newline at end of file +}