v6/src/generator/posts/parse.rs

344 lines
9.2 KiB
Rust

use crate::generator;
use crate::generator::markdown;
use crate::generator::util::slugify::slugify;
use crate::generator::util::{from_frontmatter, word_count};
use anyhow::anyhow;
use chrono::{DateTime, Datelike, FixedOffset};
use once_cell::sync::Lazy;
use regex::Regex;
use serde::de::{SeqAccess, Visitor};
use serde::Deserialize;
use std::fs;
use std::hash::Hash;
use std::io::Read;
use std::path::PathBuf;
pub async fn parse() -> anyhow::Result<Vec<Post<HtmlContent>>> {
let posts_path = generator::content_path("posts/");
let mut posts = vec![];
for ent in fs::read_dir(posts_path)? {
let ent = ent.unwrap();
let path: PathBuf;
if ent.file_type().unwrap().is_dir() {
path = find_index(ent.path()).expect("folder posts must have index file");
} else {
path = ent.path();
}
let post = parse_post(path)?;
let prepared = prepare_post(post);
posts.push(prepared);
}
posts.sort_by(|a, b| b.metadata.date.cmp(&a.metadata.date));
Ok(posts)
}
fn find_index(path: PathBuf) -> Option<PathBuf> {
let dir = std::fs::read_dir(path).ok()?;
dir.map(|e| e.unwrap())
.find(|e| e.path().file_stem().unwrap().eq_ignore_ascii_case("index"))
.map(|e| e.path())
}
pub fn parse_post(path: PathBuf) -> anyhow::Result<Post<AnyContent>> {
let mut f = fs::File::open(&path)?;
let mut buffer = String::new();
f.read_to_string(&mut buffer)?;
Post::new(path, &buffer)
}
static EXCERPT_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new("<!--\\s*excerpt-end\\s*-->").unwrap());
pub fn prepare_post(post: Post<AnyContent>) -> Post<HtmlContent> {
let wc = post.content.word_count();
let mut new_post = post.to_html();
new_post.word_count = Some(wc);
if let Some(m) = EXCERPT_REGEX.find(&new_post.content.0) {
new_post.excerpt = Some(new_post.content.0[0..m.start()].trim().into());
}
new_post
}
#[derive(Debug)]
pub struct Post<Content: PostContent> {
pub path: PathBuf,
pub metadata: PostMetadata,
pub slug: String,
pub word_count: Option<u32>,
pub excerpt: Option<String>,
pub content: Content,
}
impl Post<AnyContent> {
pub fn new(path: PathBuf, contents: &str) -> anyhow::Result<Self> {
let (metadata, rest_contents) = match from_frontmatter::<'_, PostMetadata>(contents) {
Ok(res) => res,
Err(e) => return Err(e),
};
let slug = metadata
.slug
.clone()
.unwrap_or_else(|| slugify(&metadata.title));
let ext = path.extension().unwrap().to_str().unwrap();
let content = content_from(ext, rest_contents)?;
Ok(Post {
path,
metadata,
slug: slug.to_owned(),
word_count: None,
excerpt: None,
content,
})
}
}
impl<C: PostContent> Post<C> {
fn to_html(self) -> Post<HtmlContent> {
Post {
path: self.path,
metadata: self.metadata,
slug: self.slug,
word_count: self.word_count,
excerpt: self.excerpt,
content: self.content.to_html(),
}
}
pub fn permalink(&self) -> String {
format!("/{}/{}/", self.metadata.date.year(), self.slug)
}
pub fn comments_permalink(&self) -> String {
match self.metadata.use_old_permalink_for_comments {
Some(true) => {
let old = self.metadata.old_permalink.as_ref().unwrap();
assert_eq!(old.len(), 1);
return old[0].clone();
}
_ => self.permalink(),
}
}
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PostMetadata {
pub title: String,
pub html_title: Option<String>,
pub tags: Option<Vec<Tag>>,
pub date: DateTime<FixedOffset>,
pub short_desc: Option<String>,
slug: Option<String>,
pub preamble: Option<String>,
#[serde(deserialize_with = "deserialize_old_permalink", default)]
pub old_permalink: Option<Vec<String>>,
pub use_old_permalink_for_comments: Option<bool>,
pub card_image_path: Option<String>,
}
fn deserialize_old_permalink<'de, D>(deserializer: D) -> Result<Option<Vec<String>>, D::Error>
where
D: serde::Deserializer<'de>,
{
struct StringOrVec;
impl<'de> Visitor<'de> for StringOrVec {
type Value = Option<Vec<String>>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("string or vec of strings")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Some(vec![value.to_owned()]))
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let mut vec = Vec::with_capacity(seq.size_hint().unwrap_or(1));
loop {
match seq.next_element::<String>() {
Ok(Some(s)) => vec.push(s),
Ok(None) => break,
Err(e) => return Err(e),
}
}
Ok(Some(vec))
}
fn visit_none<E>(self) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
println!("visit_none");
Ok(None)
}
}
deserializer.deserialize_any(StringOrVec)
}
#[derive(Debug, Eq)]
pub struct Tag {
pub name: String,
pub slug: String,
}
impl<'de> Deserialize<'de> for Tag {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let name = String::deserialize(deserializer)?;
let slug = slugify(&name);
Ok(Tag { name, slug })
}
}
impl PartialEq for Tag {
fn eq(&self, other: &Self) -> bool {
self.slug == other.slug
}
}
impl Hash for Tag {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.slug.hash(state);
}
}
pub trait PostContent: std::fmt::Debug {
fn to_html(self) -> HtmlContent;
fn word_count(&self) -> u32;
}
#[derive(Debug)]
pub enum AnyContent {
Markdown(MarkdownContent),
Html(HtmlContent),
}
impl PostContent for AnyContent {
fn to_html(self) -> HtmlContent {
match self {
AnyContent::Markdown(inner) => inner.to_html(),
AnyContent::Html(inner) => inner,
}
}
fn word_count(&self) -> u32 {
match self {
AnyContent::Markdown(inner) => inner.word_count(),
AnyContent::Html(inner) => inner.word_count(),
}
}
}
#[derive(Debug)]
pub struct MarkdownContent(String);
impl PostContent for MarkdownContent {
fn to_html(self) -> HtmlContent {
let mut buf = vec![];
markdown::render(&self.0, &mut buf);
HtmlContent(String::from_utf8(buf).unwrap())
}
fn word_count(&self) -> u32 {
word_count::markdown(&self.0)
}
}
#[derive(Debug, Clone)]
pub struct HtmlContent(String);
impl PostContent for HtmlContent {
fn to_html(self) -> HtmlContent {
self
}
fn word_count(&self) -> u32 {
word_count::html(&self.0)
}
}
impl HtmlContent {
pub fn html(&self) -> &str {
&self.0
}
}
fn content_from(extension: &str, content: &str) -> anyhow::Result<AnyContent> {
match extension {
"md" => Ok(AnyContent::Markdown(MarkdownContent(content.to_owned()))),
"html" => Ok(AnyContent::Html(HtmlContent(content.to_owned()))),
_ => Err(anyhow!("unknown extension {}", extension)),
}
}
#[cfg(test)]
mod tests {
use chrono::{DateTime, FixedOffset, TimeZone};
use serde::Deserialize;
#[derive(Deserialize)]
struct DeserializedDate {
d: DateTime<FixedOffset>,
}
#[test]
fn test_deserialize_date() {
let deserialized: DeserializedDate =
toml::from_str(r#"d = "2017-02-17 14:30:42 -0400""#).unwrap();
let expected = FixedOffset::west(4 * 3600)
.ymd(2017, 2, 17)
.and_hms(14, 30, 42);
assert_eq!(deserialized.d, expected);
}
#[test]
fn test_deserialize_old_permalink() {
let none: super::PostMetadata = toml::from_str(
r#"
title = "Mocking HTTP Requests for iOS App UI Tests"
date = "2019-12-22 19:12:42 -0400"
"#,
)
.unwrap();
assert_eq!(none.old_permalink, None);
let single: super::PostMetadata = toml::from_str(
r#"
title = "Mocking HTTP Requests for iOS App UI Tests"
date = "2019-12-22 19:12:42 -0400"
old_permalink = "/ios/2019/mock-http-ios-ui-testing/"
"#,
)
.unwrap();
assert_eq!(
single.old_permalink,
Some(vec!["/ios/2019/mock-http-ios-ui-testing/".into()])
);
let multi: super::PostMetadata = toml::from_str(
r#"
title = "Mocking HTTP Requests for iOS App UI Tests"
date = "2019-12-22 19:12:42 -0400"
old_permalink = ["/ios/2019/mock-http-ios-ui-testing/", "something else"]
"#,
)
.unwrap();
assert_eq!(
multi.old_permalink,
Some(vec![
"/ios/2019/mock-http-ios-ui-testing/".into(),
"something else".into()
])
);
}
}