81 lines
2.6 KiB
Rust
81 lines
2.6 KiB
Rust
|
use html5ever::serialize::SerializeOpts;
|
||
|
use html5ever::tendril::TendrilSink;
|
||
|
use html5ever::{local_name, namespace_url, ns, parse_fragment, serialize, ParseOpts, QualName};
|
||
|
use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle};
|
||
|
use std::collections::VecDeque;
|
||
|
use std::rc::Rc;
|
||
|
use url::Url;
|
||
|
|
||
|
pub fn rewrite(html: &str, base_url: &Url) -> String {
|
||
|
let dom = parse_fragment(
|
||
|
RcDom::default(),
|
||
|
ParseOpts::default(),
|
||
|
QualName::new(None, ns!(html), local_name!("div")),
|
||
|
vec![],
|
||
|
)
|
||
|
.one(html);
|
||
|
|
||
|
let mut nodes = VecDeque::<Handle>::new();
|
||
|
nodes.push_back(Rc::clone(&dom.document));
|
||
|
|
||
|
while let Some(front) = nodes.pop_front() {
|
||
|
if let NodeData::Element { ref name, ref attrs, .. } = front.data
|
||
|
&& let QualName { prefix: None, ns: ns!(html), local: local_name!("img") | local_name!("source") } =name
|
||
|
{
|
||
|
for attr in attrs.borrow_mut().iter_mut() {
|
||
|
if let QualName { prefix: None, ns: ns!(), local: local_name!("src") } = attr.name
|
||
|
&& let Ok(res) = base_url.join(&attr.value.to_string()) {
|
||
|
attr.value = res.to_string().into();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
let children = front.children.borrow();
|
||
|
nodes.reserve(children.len());
|
||
|
for child in children.iter() {
|
||
|
nodes.push_back(Rc::clone(child));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
let mut output = vec![];
|
||
|
let document: SerializableHandle = Rc::clone(&dom.document).into();
|
||
|
serialize(&mut output, &document, SerializeOpts::default()).unwrap();
|
||
|
assert!(output.starts_with("<html>".as_bytes()));
|
||
|
output.drain(0.."<html>".len());
|
||
|
output.drain(output.len() - "</html>".len()..);
|
||
|
String::from_utf8(output).unwrap()
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod tests {
|
||
|
use super::rewrite;
|
||
|
use url::Url;
|
||
|
|
||
|
#[test]
|
||
|
fn url_test() {
|
||
|
assert_eq!(
|
||
|
Url::parse("https://example.com/a/b")
|
||
|
.unwrap()
|
||
|
.join("/c/d")
|
||
|
.unwrap(),
|
||
|
Url::parse("https://example.com/c/d").unwrap()
|
||
|
);
|
||
|
assert_eq!(
|
||
|
Url::parse("https://example.com/a/b")
|
||
|
.unwrap()
|
||
|
.join("https://two.example.com/c/d")
|
||
|
.unwrap(),
|
||
|
Url::parse("https://two.example.com/c/d").unwrap()
|
||
|
);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_rewrite() {
|
||
|
let source = r#"<aside><p>a</p><img src="/a/b/c.png"><p>b</p></aside>"#;
|
||
|
let result = rewrite(source, &Url::parse("https://example.com/a/b/").unwrap());
|
||
|
assert_eq!(
|
||
|
result,
|
||
|
r#"<aside><p>a</p><img src="https://example.com/a/b/c.png"><p>b</p></aside>"#
|
||
|
);
|
||
|
}
|
||
|
}
|