use html5ever::serialize::SerializeOpts; use html5ever::tendril::TendrilSink; use html5ever::{local_name, namespace_url, ns, parse_fragment, serialize, ParseOpts, QualName}; use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle}; use std::collections::VecDeque; use std::rc::Rc; use url::Url; pub fn rewrite(html: &str, base_url: &Url) -> String { let dom = parse_fragment( RcDom::default(), ParseOpts::default(), QualName::new(None, ns!(html), local_name!("div")), vec![], ) .one(html); let mut nodes = VecDeque::::new(); nodes.push_back(Rc::clone(&dom.document)); while let Some(front) = nodes.pop_front() { if let NodeData::Element { ref name, ref attrs, .. } = front.data && let QualName { prefix: None, ns: ns!(html), local: local_name!("img") | local_name!("source") } =name { for attr in attrs.borrow_mut().iter_mut() { if let QualName { prefix: None, ns: ns!(), local: local_name!("src") } = attr.name && let Ok(res) = base_url.join(&attr.value.to_string()) { attr.value = res.to_string().into(); } } } let children = front.children.borrow(); nodes.reserve(children.len()); for child in children.iter() { nodes.push_back(Rc::clone(child)); } } let mut output = vec![]; let document: SerializableHandle = Rc::clone(&dom.document).into(); serialize(&mut output, &document, SerializeOpts::default()).unwrap(); assert!(output.starts_with("".as_bytes())); output.drain(0.."".len()); output.drain(output.len() - "".len()..); String::from_utf8(output).unwrap() } #[cfg(test)] mod tests { use super::rewrite; use url::Url; #[test] fn url_test() { assert_eq!( Url::parse("https://example.com/a/b") .unwrap() .join("/c/d") .unwrap(), Url::parse("https://example.com/c/d").unwrap() ); assert_eq!( Url::parse("https://example.com/a/b") .unwrap() .join("https://two.example.com/c/d") .unwrap(), Url::parse("https://two.example.com/c/d").unwrap() ); } #[test] fn test_rewrite() { let source = r#""#; let result = rewrite(source, &Url::parse("https://example.com/a/b/").unwrap()); assert_eq!( result, r#""# ); } }