v6/src/activitypub/util/rewrite_srcs.rs

81 lines
2.6 KiB
Rust

use html5ever::serialize::SerializeOpts;
use html5ever::tendril::TendrilSink;
use html5ever::{local_name, namespace_url, ns, parse_fragment, serialize, ParseOpts, QualName};
use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle};
use std::collections::VecDeque;
use std::rc::Rc;
use url::Url;
pub fn rewrite(html: &str, base_url: &Url) -> String {
let dom = parse_fragment(
RcDom::default(),
ParseOpts::default(),
QualName::new(None, ns!(html), local_name!("div")),
vec![],
)
.one(html);
let mut nodes = VecDeque::<Handle>::new();
nodes.push_back(Rc::clone(&dom.document));
while let Some(front) = nodes.pop_front() {
if let NodeData::Element { ref name, ref attrs, .. } = front.data
&& let QualName { prefix: None, ns: ns!(html), local: local_name!("img") | local_name!("source") } =name
{
for attr in attrs.borrow_mut().iter_mut() {
if let QualName { prefix: None, ns: ns!(), local: local_name!("src") } = attr.name
&& let Ok(res) = base_url.join(&attr.value.to_string()) {
attr.value = res.to_string().into();
}
}
}
let children = front.children.borrow();
nodes.reserve(children.len());
for child in children.iter() {
nodes.push_back(Rc::clone(child));
}
}
let mut output = vec![];
let document: SerializableHandle = Rc::clone(&dom.document).into();
serialize(&mut output, &document, SerializeOpts::default()).unwrap();
assert!(output.starts_with("<html>".as_bytes()));
output.drain(0.."<html>".len());
output.drain(output.len() - "</html>".len()..);
String::from_utf8(output).unwrap()
}
#[cfg(test)]
mod tests {
use super::rewrite;
use url::Url;
#[test]
fn url_test() {
assert_eq!(
Url::parse("https://example.com/a/b")
.unwrap()
.join("/c/d")
.unwrap(),
Url::parse("https://example.com/c/d").unwrap()
);
assert_eq!(
Url::parse("https://example.com/a/b")
.unwrap()
.join("https://two.example.com/c/d")
.unwrap(),
Url::parse("https://two.example.com/c/d").unwrap()
);
}
#[test]
fn test_rewrite() {
let source = r#"<aside><p>a</p><img src="/a/b/c.png"><p>b</p></aside>"#;
let result = rewrite(source, &Url::parse("https://example.com/a/b/").unwrap());
assert_eq!(
result,
r#"<aside><p>a</p><img src="https://example.com/a/b/c.png"><p>b</p></aside>"#
);
}
}