split into posts

This commit is contained in:
2026-05-16 23:52:20 +02:00
parent f1d5c4a4fd
commit ac99cc724a
6 changed files with 482 additions and 440 deletions
+113
View File
@@ -0,0 +1,113 @@
//! The in-memory posts cache: rebuilt from disk at startup and after every
//! mutation, plus prev/next neighbour lookup over the visible set.
use tokio::fs;
use tracing::warn;
use crate::models::{PostInfo, PostMeta, PostNeighbor};
use crate::post::images::{cover_from, dim_for_url, extract_images};
use crate::post::parse::{excerpt_from, parse_post, reading_time};
use crate::{AppState, CachedPost};
fn build_post_info(slug: &str, meta: &PostMeta, body: &str) -> PostInfo {
let images = extract_images(body);
PostInfo {
slug: slug.to_string(),
date: meta.date,
title: meta.title.clone(),
summary: meta.summary.clone(),
tags: meta.tags.clone(),
draft: meta.draft,
reading_time: reading_time(body),
excerpt: excerpt_from(meta, body),
cover_image: cover_from(&images),
image_count: images.len() as u32,
}
}
/// Scans the posts directory and replaces the in-memory cache.
/// Called at startup and after any mutation (create/rename/delete).
pub(crate) async fn rebuild_posts_cache(state: &AppState) {
let posts_dir = state.data_dir.join("posts");
let mut posts: Vec<CachedPost> = Vec::new();
let mut rd = match fs::read_dir(&posts_dir).await {
Ok(rd) => rd,
Err(_) => {
*state.posts_cache.write().await = posts;
return;
}
};
loop {
match rd.next_entry().await {
Ok(Some(entry)) => {
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("md") {
continue;
}
let Some(slug) = path.file_stem().and_then(|s| s.to_str()) else {
continue;
};
if slug.starts_with('.') {
continue;
}
let Ok(raw) = fs::read_to_string(&path).await else {
continue;
};
let Ok((meta, body)) = parse_post(&raw) else {
warn!("Skipping post with bad frontmatter: {}", slug);
continue;
};
let mut info = build_post_info(slug, &meta, &body);
if let Some(cover) = info.cover_image.as_mut() {
if let Some(d) = dim_for_url(state, &cover.url).await {
cover.w = Some(d.w);
cover.h = Some(d.h);
}
}
posts.push(CachedPost { info, body });
}
Ok(None) => break,
Err(e) => {
warn!("Error iterating posts dir: {}", e);
break;
}
}
}
posts.sort_by(|a, b| {
b.info
.date
.cmp(&a.info.date)
.then_with(|| a.info.slug.cmp(&b.info.slug))
});
*state.posts_cache.write().await = posts;
}
pub(crate) async fn neighbors_from_cache(
state: &AppState,
slug: &str,
admin: bool,
) -> (Option<PostNeighbor>, Option<PostNeighbor>) {
let cache = state.posts_cache.read().await;
let visible: Vec<&PostInfo> = cache
.iter()
.filter(|p| admin || !p.info.draft)
.map(|p| &p.info)
.collect();
let Some(i) = visible.iter().position(|p| p.slug == slug) else {
return (None, None);
};
let to_neighbor = |p: &PostInfo| PostNeighbor {
slug: p.slug.clone(),
title: p.title.clone(),
};
let prev = if i > 0 {
Some(to_neighbor(visible[i - 1]))
} else {
None
};
let next = visible.get(i + 1).map(|p| to_neighbor(p));
(prev, next)
}
+166
View File
@@ -0,0 +1,166 @@
//! Markdown image extraction, cover selection, and the on-disk
//! image-dimension probe (header-only read, cached on `AppState`).
use std::collections::HashMap;
use crate::AppState;
use crate::models::{CoverImage, ImageDim};
/// Scan markdown for `![alt](url)` images. Returns (alt, url) pairs in order.
/// Skips inside fenced code blocks. Tolerates titles like `![alt](url "title")`.
pub(crate) fn extract_images(body: &str) -> Vec<(String, String)> {
let mut out = Vec::new();
let mut in_fence = false;
for line in body.lines() {
let trimmed = line.trim_start();
if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
in_fence = !in_fence;
continue;
}
if in_fence {
continue;
}
let bytes = line.as_bytes();
let mut i = 0;
while i + 1 < bytes.len() {
if bytes[i] == b'!' && bytes[i + 1] == b'[' {
if let Some(rel_close) = line[i + 2..].find(']') {
let close = i + 2 + rel_close;
if close + 1 < line.len() && bytes[close + 1] == b'(' {
if let Some(rel_paren) = line[close + 2..].find(')') {
let paren_end = close + 2 + rel_paren;
let alt = line[i + 2..close].to_string();
let url_field = line[close + 2..paren_end].trim();
let url = url_field
.split_once(|c: char| c.is_whitespace())
.map(|(u, _)| u)
.unwrap_or(url_field)
.trim_matches(|c| c == '<' || c == '>')
.to_string();
if !url.is_empty() {
out.push((alt, url));
}
i = paren_end + 1;
continue;
}
}
}
}
i += 1;
}
}
out
}
pub(crate) fn cover_from(images: &[(String, String)]) -> Option<CoverImage> {
images.first().map(|(alt, url)| CoverImage {
url: url.clone(),
alt: alt.clone(),
w: None,
h: None,
})
}
/// Probe an uploads-relative URL for image dimensions. Reads only header
/// bytes via `imagesize::size`, off the runtime via `spawn_blocking`.
async fn compute_dim_from_url(state: &AppState, url: &str) -> Option<ImageDim> {
let name = url.strip_prefix("/uploads/")?;
if name.is_empty() || name.contains("..") || name.contains('\\') || name.starts_with('/') {
return None;
}
let path = state.data_dir.join("uploads").join(name);
tokio::task::spawn_blocking(move || imagesize::size(&path).ok())
.await
.ok()
.flatten()
.map(|s| ImageDim {
w: s.width as u32,
h: s.height as u32,
})
}
/// Returns cached dim if present, else probes the file and caches the result.
pub(crate) async fn dim_for_url(state: &AppState, url: &str) -> Option<ImageDim> {
{
let cache = state.image_dims_cache.read().await;
if let Some(d) = cache.get(url) {
return Some(*d);
}
}
let d = compute_dim_from_url(state, url).await?;
state
.image_dims_cache
.write()
.await
.insert(url.to_string(), d);
Some(d)
}
/// Returns a map of `url -> ImageDim` for the given URLs, using the cache
/// and probing only the URLs that aren't cached yet.
pub(crate) async fn dims_for_urls(state: &AppState, urls: &[String]) -> HashMap<String, ImageDim> {
let mut out: HashMap<String, ImageDim> = HashMap::new();
let mut missing: Vec<String> = Vec::new();
{
let cache = state.image_dims_cache.read().await;
for url in urls {
if out.contains_key(url) {
continue;
}
if let Some(d) = cache.get(url) {
out.insert(url.clone(), *d);
} else {
missing.push(url.clone());
}
}
}
if missing.is_empty() {
return out;
}
let mut newly: Vec<(String, ImageDim)> = Vec::new();
for url in &missing {
if let Some(d) = compute_dim_from_url(state, url).await {
newly.push((url.clone(), d));
}
}
if !newly.is_empty() {
let mut cache = state.image_dims_cache.write().await;
for (url, d) in &newly {
cache.insert(url.clone(), *d);
out.insert(url.clone(), *d);
}
}
out
}
#[cfg(test)]
mod tests {
use super::{cover_from, extract_images};
#[test]
fn extract_images_skips_fences_and_strips_titles() {
let md = "intro\n\
![a](/u/one.png)\n\
```\n\
![skip](/u/hidden.png)\n\
```\n\
![c](/u/two.png \"a title\")";
let imgs = extract_images(md);
assert_eq!(
imgs,
vec![
("a".to_string(), "/u/one.png".to_string()),
("c".to_string(), "/u/two.png".to_string()),
]
);
}
#[test]
fn cover_from_takes_first_or_none() {
assert!(cover_from(&[]).is_none());
let imgs = vec![("alt".to_string(), "/u/first.png".to_string())];
let cover = cover_from(&imgs).unwrap();
assert_eq!(cover.url, "/u/first.png");
assert_eq!(cover.alt, "alt");
}
}
+13
View File
@@ -0,0 +1,13 @@
//! Post domain logic, split out of the HTTP layer.
//!
//! - [`parse`] — slug validation, frontmatter split/parse/serialize, reading
//! time, excerpt. Pure, no I/O.
//! - [`images`] — markdown image extraction, cover selection, and the
//! filesystem image-dimension probe + cache.
//! - [`cache`] — the in-memory posts cache (rebuild + neighbour lookup).
//!
//! `handlers::posts` stays thin and only orchestrates these.
pub mod cache;
pub mod images;
pub mod parse;
+179
View File
@@ -0,0 +1,179 @@
//! Pure post parsing: slug validation, YAML frontmatter, reading time,
//! excerpt. No filesystem or network access — trivially unit-testable.
use crate::error::AppError;
use crate::models::PostMeta;
const WORDS_PER_MINUTE: u32 = 200;
const MAX_SLUG_LEN: usize = 100;
const WINDOWS_RESERVED: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
"COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
];
pub(crate) fn validate_slug(s: &str) -> Result<(), AppError> {
if s.is_empty() {
return Err(AppError::BadRequest("Slug is empty".to_string()));
}
if s.len() > MAX_SLUG_LEN {
return Err(AppError::BadRequest(format!(
"Slug exceeds {} characters",
MAX_SLUG_LEN
)));
}
if s.starts_with('.') {
return Err(AppError::BadRequest(
"Slug cannot start with '.'".to_string(),
));
}
if s.ends_with('.') || s.ends_with(' ') {
return Err(AppError::BadRequest(
"Slug cannot end with '.' or space".to_string(),
));
}
if s.contains("..") {
return Err(AppError::BadRequest("Slug cannot contain '..'".to_string()));
}
for c in s.chars() {
if c.is_control() {
return Err(AppError::BadRequest(
"Slug contains control characters".to_string(),
));
}
if matches!(c, '/' | '\\' | '<' | '>' | ':' | '"' | '|' | '?' | '*') {
return Err(AppError::BadRequest(format!(
"Slug contains invalid character '{}'",
c
)));
}
}
let stem = s.split('.').next().unwrap_or("").to_ascii_uppercase();
if WINDOWS_RESERVED.iter().any(|r| *r == stem) {
return Err(AppError::BadRequest("Slug is a reserved name".to_string()));
}
Ok(())
}
pub(crate) fn split_frontmatter(raw: &str) -> Option<(&str, &str)> {
let raw = raw
.strip_prefix("---\n")
.or_else(|| raw.strip_prefix("---\r\n"))?;
let end_marker = raw.find("\n---\n").or_else(|| raw.find("\r\n---\r\n"))?;
let yaml = &raw[..end_marker];
let body_start = end_marker
+ raw[end_marker..]
.find("---\n")
.or_else(|| raw[end_marker..].find("---\r\n"))?
+ "---\n".len();
let body = raw[body_start..]
.trim_start_matches('\n')
.trim_start_matches('\r');
Some((yaml, body))
}
pub(crate) fn parse_post(raw: &str) -> Result<(PostMeta, String), AppError> {
let (yaml, body) = split_frontmatter(raw).ok_or_else(|| {
AppError::Internal(
"Missing frontmatter".to_string(),
Some("post is missing the YAML --- block".to_string()),
)
})?;
let meta: PostMeta = serde_yaml::from_str(yaml).map_err(|e| {
AppError::Internal(
"Invalid frontmatter".to_string(),
Some(format!("YAML parse error: {}", e)),
)
})?;
Ok((meta, body.to_string()))
}
pub(crate) fn serialize_post(meta: &PostMeta, body: &str) -> Result<String, AppError> {
let yaml = serde_yaml::to_string(meta)
.map_err(|e| AppError::Internal("Serialization error".to_string(), Some(e.to_string())))?;
Ok(format!("---\n{}---\n{}", yaml, body))
}
pub(crate) fn reading_time(body: &str) -> u32 {
let words = body.split_whitespace().count() as u32;
(words + WORDS_PER_MINUTE - 1) / WORDS_PER_MINUTE.max(1)
}
pub(crate) fn excerpt_from(meta: &PostMeta, body: &str) -> String {
if let Some(s) = meta.summary.as_ref() {
if !s.trim().is_empty() {
return s.trim().to_string();
}
}
let plain = body.replace(['#', '*', '_', '`'], "").replace('\n', " ");
let mut out: String = plain.chars().take(200).collect();
if plain.chars().count() > 200 {
out.push_str("...");
}
out.trim().to_string()
}
#[cfg(test)]
mod tests {
use super::{parse_post, reading_time, split_frontmatter, validate_slug};
use crate::error::AppError;
#[test]
fn validate_slug_accepts_normal_slugs() {
assert!(validate_slug("hello-world").is_ok());
assert!(validate_slug("a_b.c-123").is_ok());
}
#[test]
fn validate_slug_rejects_traversal_and_bad_chars() {
for bad in [
"",
"../etc",
"with/slash",
"back\\slash",
"ends.",
"trailing ",
".hidden",
] {
assert!(
matches!(validate_slug(bad), Err(AppError::BadRequest(_))),
"expected {bad:?} to be rejected"
);
}
let too_long = "x".repeat(101);
assert!(validate_slug(&too_long).is_err());
assert!(matches!(validate_slug("CON"), Err(AppError::BadRequest(_))));
}
#[test]
fn split_frontmatter_handles_lf_and_crlf() {
let (yaml, body) = split_frontmatter("---\ndate: 2026-05-16\n---\nHello").unwrap();
assert_eq!(yaml, "date: 2026-05-16");
assert_eq!(body, "Hello");
let (y2, b2) = split_frontmatter("---\r\ndate: 2026-05-16\r\n---\r\nHi").unwrap();
assert!(y2.contains("date: 2026-05-16"));
assert_eq!(b2, "Hi");
assert!(split_frontmatter("no frontmatter here").is_none());
}
#[test]
fn parse_post_reads_meta_and_body() {
let raw = "---\ndate: 2026-05-16\ntitle: Hello\ndraft: true\n---\nBody text";
let (meta, body) = parse_post(raw).unwrap();
assert_eq!(meta.title.as_deref(), Some("Hello"));
assert!(meta.draft);
assert_eq!(meta.date.to_string(), "2026-05-16");
assert_eq!(body, "Body text");
assert!(parse_post("no frontmatter").is_err());
}
#[test]
fn reading_time_rounds_up_by_wpm() {
assert_eq!(reading_time(""), 0);
assert_eq!(reading_time("one"), 1);
assert_eq!(reading_time(&"word ".repeat(200)), 1);
assert_eq!(reading_time(&"word ".repeat(201)), 2);
}
}