diff --git a/backend/src/handlers/posts.rs b/backend/src/handlers/posts.rs index a175f02..46e872d 100644 --- a/backend/src/handlers/posts.rs +++ b/backend/src/handlers/posts.rs @@ -1,334 +1,26 @@ +//! HTTP handlers for posts. Orchestration only — parsing, image handling, +//! and the cache live in [`crate::post`]. + use axum::{ Json, extract::{Path, State}, http::{HeaderMap, StatusCode}, }; use chrono::Utc; -use std::{collections::HashMap, sync::Arc}; +use std::sync::Arc; use tokio::fs; use tracing::{error, info, warn}; +use crate::post::cache::{neighbors_from_cache, rebuild_posts_cache}; +use crate::post::images::{cover_from, dims_for_urls, extract_images}; +use crate::post::parse::{reading_time, serialize_post, validate_slug}; use crate::{ - AppState, CachedPost, + AppState, auth::is_authed, error::AppError, - models::{ - CoverImage, CreatePostRequest, ImageDim, PostDetail, PostInfo, PostMeta, PostNeighbor, - }, + models::{CreatePostRequest, PostDetail, PostInfo, PostMeta}, }; -const WORDS_PER_MINUTE: u32 = 200; - -const MAX_SLUG_LEN: usize = 100; -const WINDOWS_RESERVED: &[&str] = &[ - "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", - "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", -]; - -fn validate_slug(s: &str) -> Result<(), AppError> { - if s.is_empty() { - return Err(AppError::BadRequest("Slug is empty".to_string())); - } - if s.len() > MAX_SLUG_LEN { - return Err(AppError::BadRequest(format!( - "Slug exceeds {} characters", - MAX_SLUG_LEN - ))); - } - if s.starts_with('.') { - return Err(AppError::BadRequest( - "Slug cannot start with '.'".to_string(), - )); - } - if s.ends_with('.') || s.ends_with(' ') { - return Err(AppError::BadRequest( - "Slug cannot end with '.' or space".to_string(), - )); - } - if s.contains("..") { - return Err(AppError::BadRequest("Slug cannot contain '..'".to_string())); - } - for c in s.chars() { - if c.is_control() { - return Err(AppError::BadRequest( - "Slug contains control characters".to_string(), - )); - } - if matches!(c, '/' | '\\' | '<' | '>' | ':' | '"' | '|' | '?' | '*') { - return Err(AppError::BadRequest(format!( - "Slug contains invalid character '{}'", - c - ))); - } - } - let stem = s.split('.').next().unwrap_or("").to_ascii_uppercase(); - if WINDOWS_RESERVED.iter().any(|r| *r == stem) { - return Err(AppError::BadRequest("Slug is a reserved name".to_string())); - } - Ok(()) -} - -fn split_frontmatter(raw: &str) -> Option<(&str, &str)> { - let raw = raw - .strip_prefix("---\n") - .or_else(|| raw.strip_prefix("---\r\n"))?; - let end_marker = raw.find("\n---\n").or_else(|| raw.find("\r\n---\r\n"))?; - let yaml = &raw[..end_marker]; - let body_start = end_marker - + raw[end_marker..] - .find("---\n") - .or_else(|| raw[end_marker..].find("---\r\n"))? - + "---\n".len(); - let body = raw[body_start..] - .trim_start_matches('\n') - .trim_start_matches('\r'); - Some((yaml, body)) -} - -fn parse_post(raw: &str) -> Result<(PostMeta, String), AppError> { - let (yaml, body) = split_frontmatter(raw).ok_or_else(|| { - AppError::Internal( - "Missing frontmatter".to_string(), - Some("post is missing the YAML --- block".to_string()), - ) - })?; - let meta: PostMeta = serde_yaml::from_str(yaml).map_err(|e| { - AppError::Internal( - "Invalid frontmatter".to_string(), - Some(format!("YAML parse error: {}", e)), - ) - })?; - Ok((meta, body.to_string())) -} - -fn serialize_post(meta: &PostMeta, body: &str) -> Result { - let yaml = serde_yaml::to_string(meta) - .map_err(|e| AppError::Internal("Serialization error".to_string(), Some(e.to_string())))?; - Ok(format!("---\n{}---\n{}", yaml, body)) -} - -fn reading_time(body: &str) -> u32 { - let words = body.split_whitespace().count() as u32; - (words + WORDS_PER_MINUTE - 1) / WORDS_PER_MINUTE.max(1) -} - -/// Scan markdown for `![alt](url)` images. Returns (alt, url) pairs in order. -/// Skips inside fenced code blocks. Tolerates titles like `![alt](url "title")`. -fn extract_images(body: &str) -> Vec<(String, String)> { - let mut out = Vec::new(); - let mut in_fence = false; - for line in body.lines() { - let trimmed = line.trim_start(); - if trimmed.starts_with("```") || trimmed.starts_with("~~~") { - in_fence = !in_fence; - continue; - } - if in_fence { - continue; - } - let bytes = line.as_bytes(); - let mut i = 0; - while i + 1 < bytes.len() { - if bytes[i] == b'!' && bytes[i + 1] == b'[' { - if let Some(rel_close) = line[i + 2..].find(']') { - let close = i + 2 + rel_close; - if close + 1 < line.len() && bytes[close + 1] == b'(' { - if let Some(rel_paren) = line[close + 2..].find(')') { - let paren_end = close + 2 + rel_paren; - let alt = line[i + 2..close].to_string(); - let url_field = line[close + 2..paren_end].trim(); - let url = url_field - .split_once(|c: char| c.is_whitespace()) - .map(|(u, _)| u) - .unwrap_or(url_field) - .trim_matches(|c| c == '<' || c == '>') - .to_string(); - if !url.is_empty() { - out.push((alt, url)); - } - i = paren_end + 1; - continue; - } - } - } - } - i += 1; - } - } - out -} - -fn cover_from(images: &[(String, String)]) -> Option { - images.first().map(|(alt, url)| CoverImage { - url: url.clone(), - alt: alt.clone(), - w: None, - h: None, - }) -} - -/// Probe an uploads-relative URL for image dimensions. Reads only header -/// bytes via `imagesize::size`, off the runtime via `spawn_blocking`. -async fn compute_dim_from_url(state: &AppState, url: &str) -> Option { - let name = url.strip_prefix("/uploads/")?; - if name.is_empty() || name.contains("..") || name.contains('\\') || name.starts_with('/') { - return None; - } - let path = state.data_dir.join("uploads").join(name); - tokio::task::spawn_blocking(move || imagesize::size(&path).ok()) - .await - .ok() - .flatten() - .map(|s| ImageDim { - w: s.width as u32, - h: s.height as u32, - }) -} - -/// Returns cached dim if present, else probes the file and caches the result. -async fn dim_for_url(state: &AppState, url: &str) -> Option { - { - let cache = state.image_dims_cache.read().await; - if let Some(d) = cache.get(url) { - return Some(*d); - } - } - let d = compute_dim_from_url(state, url).await?; - state - .image_dims_cache - .write() - .await - .insert(url.to_string(), d); - Some(d) -} - -/// Returns a map of `url -> ImageDim` for the given URLs, using the cache -/// and probing only the URLs that aren't cached yet. -async fn dims_for_urls(state: &AppState, urls: &[String]) -> HashMap { - let mut out: HashMap = HashMap::new(); - let mut missing: Vec = Vec::new(); - { - let cache = state.image_dims_cache.read().await; - for url in urls { - if out.contains_key(url) { - continue; - } - if let Some(d) = cache.get(url) { - out.insert(url.clone(), *d); - } else { - missing.push(url.clone()); - } - } - } - if missing.is_empty() { - return out; - } - let mut newly: Vec<(String, ImageDim)> = Vec::new(); - for url in &missing { - if let Some(d) = compute_dim_from_url(state, url).await { - newly.push((url.clone(), d)); - } - } - if !newly.is_empty() { - let mut cache = state.image_dims_cache.write().await; - for (url, d) in &newly { - cache.insert(url.clone(), *d); - out.insert(url.clone(), *d); - } - } - out -} - -fn excerpt_from(meta: &PostMeta, body: &str) -> String { - if let Some(s) = meta.summary.as_ref() { - if !s.trim().is_empty() { - return s.trim().to_string(); - } - } - let plain = body.replace(['#', '*', '_', '`'], "").replace('\n', " "); - let mut out: String = plain.chars().take(200).collect(); - if plain.chars().count() > 200 { - out.push_str("..."); - } - out.trim().to_string() -} - -fn build_post_info(slug: &str, meta: &PostMeta, body: &str) -> PostInfo { - let images = extract_images(body); - PostInfo { - slug: slug.to_string(), - date: meta.date, - title: meta.title.clone(), - summary: meta.summary.clone(), - tags: meta.tags.clone(), - draft: meta.draft, - reading_time: reading_time(body), - excerpt: excerpt_from(meta, body), - cover_image: cover_from(&images), - image_count: images.len() as u32, - } -} - -/// Scans the posts directory and replaces the in-memory cache. -/// Called at startup and after any mutation (create/rename/delete). -pub async fn rebuild_posts_cache(state: &AppState) { - let posts_dir = state.data_dir.join("posts"); - let mut posts: Vec = Vec::new(); - - let mut rd = match fs::read_dir(&posts_dir).await { - Ok(rd) => rd, - Err(_) => { - *state.posts_cache.write().await = posts; - return; - } - }; - - loop { - match rd.next_entry().await { - Ok(Some(entry)) => { - let path = entry.path(); - if path.extension().and_then(|e| e.to_str()) != Some("md") { - continue; - } - let Some(slug) = path.file_stem().and_then(|s| s.to_str()) else { - continue; - }; - if slug.starts_with('.') { - continue; - } - let Ok(raw) = fs::read_to_string(&path).await else { - continue; - }; - let Ok((meta, body)) = parse_post(&raw) else { - warn!("Skipping post with bad frontmatter: {}", slug); - continue; - }; - let mut info = build_post_info(slug, &meta, &body); - if let Some(cover) = info.cover_image.as_mut() { - if let Some(d) = dim_for_url(state, &cover.url).await { - cover.w = Some(d.w); - cover.h = Some(d.h); - } - } - posts.push(CachedPost { info, body }); - } - Ok(None) => break, - Err(e) => { - warn!("Error iterating posts dir: {}", e); - break; - } - } - } - - posts.sort_by(|a, b| { - b.info - .date - .cmp(&a.info.date) - .then_with(|| a.info.slug.cmp(&b.info.slug)) - }); - *state.posts_cache.write().await = posts; -} - async fn write_post_atomic(state: &AppState, slug: &str, contents: &str) -> Result<(), AppError> { let _guard = state.post_lock.lock().await; let final_path = state.data_dir.join("posts").join(format!("{}.md", slug)); @@ -489,33 +181,6 @@ pub async fn list_posts( Json(posts) } -async fn neighbors_from_cache( - state: &AppState, - slug: &str, - admin: bool, -) -> (Option, Option) { - let cache = state.posts_cache.read().await; - let visible: Vec<&PostInfo> = cache - .iter() - .filter(|p| admin || !p.info.draft) - .map(|p| &p.info) - .collect(); - let Some(i) = visible.iter().position(|p| p.slug == slug) else { - return (None, None); - }; - let to_neighbor = |p: &PostInfo| PostNeighbor { - slug: p.slug.clone(), - title: p.title.clone(), - }; - let prev = if i > 0 { - Some(to_neighbor(visible[i - 1])) - } else { - None - }; - let next = visible.get(i + 1).map(|p| to_neighbor(p)); - (prev, next) -} - pub async fn get_post( State(state): State>, headers: HeaderMap, @@ -556,98 +221,3 @@ pub async fn get_post( dimensions, })) } - -#[cfg(test)] -mod tests { - use super::{ - cover_from, extract_images, parse_post, reading_time, split_frontmatter, validate_slug, - }; - use crate::error::AppError; - - #[test] - fn validate_slug_accepts_normal_slugs() { - assert!(validate_slug("hello-world").is_ok()); - assert!(validate_slug("a_b.c-123").is_ok()); - } - - #[test] - fn validate_slug_rejects_traversal_and_bad_chars() { - for bad in [ - "", - "../etc", - "with/slash", - "back\\slash", - "ends.", - "trailing ", - ".hidden", - ] { - assert!( - matches!(validate_slug(bad), Err(AppError::BadRequest(_))), - "expected {bad:?} to be rejected" - ); - } - let too_long = "x".repeat(101); - assert!(validate_slug(&too_long).is_err()); - assert!(matches!(validate_slug("CON"), Err(AppError::BadRequest(_)))); - } - - #[test] - fn split_frontmatter_handles_lf_and_crlf() { - let (yaml, body) = split_frontmatter("---\ndate: 2026-05-16\n---\nHello").unwrap(); - assert_eq!(yaml, "date: 2026-05-16"); - assert_eq!(body, "Hello"); - - let (y2, b2) = split_frontmatter("---\r\ndate: 2026-05-16\r\n---\r\nHi").unwrap(); - assert!(y2.contains("date: 2026-05-16")); - assert_eq!(b2, "Hi"); - - assert!(split_frontmatter("no frontmatter here").is_none()); - } - - #[test] - fn parse_post_reads_meta_and_body() { - let raw = "---\ndate: 2026-05-16\ntitle: Hello\ndraft: true\n---\nBody text"; - let (meta, body) = parse_post(raw).unwrap(); - assert_eq!(meta.title.as_deref(), Some("Hello")); - assert!(meta.draft); - assert_eq!(meta.date.to_string(), "2026-05-16"); - assert_eq!(body, "Body text"); - - assert!(parse_post("no frontmatter").is_err()); - } - - #[test] - fn reading_time_rounds_up_by_wpm() { - assert_eq!(reading_time(""), 0); - assert_eq!(reading_time("one"), 1); - assert_eq!(reading_time(&"word ".repeat(200)), 1); - assert_eq!(reading_time(&"word ".repeat(201)), 2); - } - - #[test] - fn extract_images_skips_fences_and_strips_titles() { - let md = "intro\n\ - ![a](/u/one.png)\n\ - ```\n\ - ![skip](/u/hidden.png)\n\ - ```\n\ - ![c](/u/two.png \"a title\")"; - let imgs = extract_images(md); - assert_eq!( - imgs, - vec![ - ("a".to_string(), "/u/one.png".to_string()), - ("c".to_string(), "/u/two.png".to_string()), - ] - ); - } - - #[test] - fn cover_from_takes_first_or_none() { - assert!(cover_from(&[]).is_none()); - let imgs = vec![("alt".to_string(), "/u/first.png".to_string())]; - let cover = cover_from(&imgs).unwrap(); - assert_eq!(cover.url, "/u/first.png"); - assert_eq!(cover.alt, "alt"); - } -} diff --git a/backend/src/main.rs b/backend/src/main.rs index 0f894c4..bb4c92a 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -2,6 +2,7 @@ pub mod auth; pub mod error; pub mod handlers; pub mod models; +pub mod post; use axum::{ Router, @@ -76,7 +77,7 @@ async fn main() { contact_rate_limit: Mutex::new(HashMap::new()), }); - handlers::posts::rebuild_posts_cache(&state).await; + post::cache::rebuild_posts_cache(&state).await; info!( "Posts cache primed with {} entries", state.posts_cache.read().await.len() diff --git a/backend/src/post/cache.rs b/backend/src/post/cache.rs new file mode 100644 index 0000000..3626843 --- /dev/null +++ b/backend/src/post/cache.rs @@ -0,0 +1,113 @@ +//! The in-memory posts cache: rebuilt from disk at startup and after every +//! mutation, plus prev/next neighbour lookup over the visible set. + +use tokio::fs; +use tracing::warn; + +use crate::models::{PostInfo, PostMeta, PostNeighbor}; +use crate::post::images::{cover_from, dim_for_url, extract_images}; +use crate::post::parse::{excerpt_from, parse_post, reading_time}; +use crate::{AppState, CachedPost}; + +fn build_post_info(slug: &str, meta: &PostMeta, body: &str) -> PostInfo { + let images = extract_images(body); + PostInfo { + slug: slug.to_string(), + date: meta.date, + title: meta.title.clone(), + summary: meta.summary.clone(), + tags: meta.tags.clone(), + draft: meta.draft, + reading_time: reading_time(body), + excerpt: excerpt_from(meta, body), + cover_image: cover_from(&images), + image_count: images.len() as u32, + } +} + +/// Scans the posts directory and replaces the in-memory cache. +/// Called at startup and after any mutation (create/rename/delete). +pub(crate) async fn rebuild_posts_cache(state: &AppState) { + let posts_dir = state.data_dir.join("posts"); + let mut posts: Vec = Vec::new(); + + let mut rd = match fs::read_dir(&posts_dir).await { + Ok(rd) => rd, + Err(_) => { + *state.posts_cache.write().await = posts; + return; + } + }; + + loop { + match rd.next_entry().await { + Ok(Some(entry)) => { + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("md") { + continue; + } + let Some(slug) = path.file_stem().and_then(|s| s.to_str()) else { + continue; + }; + if slug.starts_with('.') { + continue; + } + let Ok(raw) = fs::read_to_string(&path).await else { + continue; + }; + let Ok((meta, body)) = parse_post(&raw) else { + warn!("Skipping post with bad frontmatter: {}", slug); + continue; + }; + let mut info = build_post_info(slug, &meta, &body); + if let Some(cover) = info.cover_image.as_mut() { + if let Some(d) = dim_for_url(state, &cover.url).await { + cover.w = Some(d.w); + cover.h = Some(d.h); + } + } + posts.push(CachedPost { info, body }); + } + Ok(None) => break, + Err(e) => { + warn!("Error iterating posts dir: {}", e); + break; + } + } + } + + posts.sort_by(|a, b| { + b.info + .date + .cmp(&a.info.date) + .then_with(|| a.info.slug.cmp(&b.info.slug)) + }); + *state.posts_cache.write().await = posts; +} + +pub(crate) async fn neighbors_from_cache( + state: &AppState, + slug: &str, + admin: bool, +) -> (Option, Option) { + let cache = state.posts_cache.read().await; + let visible: Vec<&PostInfo> = cache + .iter() + .filter(|p| admin || !p.info.draft) + .map(|p| &p.info) + .collect(); + let Some(i) = visible.iter().position(|p| p.slug == slug) else { + return (None, None); + }; + let to_neighbor = |p: &PostInfo| PostNeighbor { + slug: p.slug.clone(), + title: p.title.clone(), + }; + let prev = if i > 0 { + Some(to_neighbor(visible[i - 1])) + } else { + None + }; + let next = visible.get(i + 1).map(|p| to_neighbor(p)); + (prev, next) +} diff --git a/backend/src/post/images.rs b/backend/src/post/images.rs new file mode 100644 index 0000000..c3474e4 --- /dev/null +++ b/backend/src/post/images.rs @@ -0,0 +1,166 @@ +//! Markdown image extraction, cover selection, and the on-disk +//! image-dimension probe (header-only read, cached on `AppState`). + +use std::collections::HashMap; + +use crate::AppState; +use crate::models::{CoverImage, ImageDim}; + +/// Scan markdown for `![alt](url)` images. Returns (alt, url) pairs in order. +/// Skips inside fenced code blocks. Tolerates titles like `![alt](url "title")`. +pub(crate) fn extract_images(body: &str) -> Vec<(String, String)> { + let mut out = Vec::new(); + let mut in_fence = false; + for line in body.lines() { + let trimmed = line.trim_start(); + if trimmed.starts_with("```") || trimmed.starts_with("~~~") { + in_fence = !in_fence; + continue; + } + if in_fence { + continue; + } + let bytes = line.as_bytes(); + let mut i = 0; + while i + 1 < bytes.len() { + if bytes[i] == b'!' && bytes[i + 1] == b'[' { + if let Some(rel_close) = line[i + 2..].find(']') { + let close = i + 2 + rel_close; + if close + 1 < line.len() && bytes[close + 1] == b'(' { + if let Some(rel_paren) = line[close + 2..].find(')') { + let paren_end = close + 2 + rel_paren; + let alt = line[i + 2..close].to_string(); + let url_field = line[close + 2..paren_end].trim(); + let url = url_field + .split_once(|c: char| c.is_whitespace()) + .map(|(u, _)| u) + .unwrap_or(url_field) + .trim_matches(|c| c == '<' || c == '>') + .to_string(); + if !url.is_empty() { + out.push((alt, url)); + } + i = paren_end + 1; + continue; + } + } + } + } + i += 1; + } + } + out +} + +pub(crate) fn cover_from(images: &[(String, String)]) -> Option { + images.first().map(|(alt, url)| CoverImage { + url: url.clone(), + alt: alt.clone(), + w: None, + h: None, + }) +} + +/// Probe an uploads-relative URL for image dimensions. Reads only header +/// bytes via `imagesize::size`, off the runtime via `spawn_blocking`. +async fn compute_dim_from_url(state: &AppState, url: &str) -> Option { + let name = url.strip_prefix("/uploads/")?; + if name.is_empty() || name.contains("..") || name.contains('\\') || name.starts_with('/') { + return None; + } + let path = state.data_dir.join("uploads").join(name); + tokio::task::spawn_blocking(move || imagesize::size(&path).ok()) + .await + .ok() + .flatten() + .map(|s| ImageDim { + w: s.width as u32, + h: s.height as u32, + }) +} + +/// Returns cached dim if present, else probes the file and caches the result. +pub(crate) async fn dim_for_url(state: &AppState, url: &str) -> Option { + { + let cache = state.image_dims_cache.read().await; + if let Some(d) = cache.get(url) { + return Some(*d); + } + } + let d = compute_dim_from_url(state, url).await?; + state + .image_dims_cache + .write() + .await + .insert(url.to_string(), d); + Some(d) +} + +/// Returns a map of `url -> ImageDim` for the given URLs, using the cache +/// and probing only the URLs that aren't cached yet. +pub(crate) async fn dims_for_urls(state: &AppState, urls: &[String]) -> HashMap { + let mut out: HashMap = HashMap::new(); + let mut missing: Vec = Vec::new(); + { + let cache = state.image_dims_cache.read().await; + for url in urls { + if out.contains_key(url) { + continue; + } + if let Some(d) = cache.get(url) { + out.insert(url.clone(), *d); + } else { + missing.push(url.clone()); + } + } + } + if missing.is_empty() { + return out; + } + let mut newly: Vec<(String, ImageDim)> = Vec::new(); + for url in &missing { + if let Some(d) = compute_dim_from_url(state, url).await { + newly.push((url.clone(), d)); + } + } + if !newly.is_empty() { + let mut cache = state.image_dims_cache.write().await; + for (url, d) in &newly { + cache.insert(url.clone(), *d); + out.insert(url.clone(), *d); + } + } + out +} + +#[cfg(test)] +mod tests { + use super::{cover_from, extract_images}; + + #[test] + fn extract_images_skips_fences_and_strips_titles() { + let md = "intro\n\ + ![a](/u/one.png)\n\ + ```\n\ + ![skip](/u/hidden.png)\n\ + ```\n\ + ![c](/u/two.png \"a title\")"; + let imgs = extract_images(md); + assert_eq!( + imgs, + vec![ + ("a".to_string(), "/u/one.png".to_string()), + ("c".to_string(), "/u/two.png".to_string()), + ] + ); + } + + #[test] + fn cover_from_takes_first_or_none() { + assert!(cover_from(&[]).is_none()); + let imgs = vec![("alt".to_string(), "/u/first.png".to_string())]; + let cover = cover_from(&imgs).unwrap(); + assert_eq!(cover.url, "/u/first.png"); + assert_eq!(cover.alt, "alt"); + } +} diff --git a/backend/src/post/mod.rs b/backend/src/post/mod.rs new file mode 100644 index 0000000..cf2777f --- /dev/null +++ b/backend/src/post/mod.rs @@ -0,0 +1,13 @@ +//! Post domain logic, split out of the HTTP layer. +//! +//! - [`parse`] — slug validation, frontmatter split/parse/serialize, reading +//! time, excerpt. Pure, no I/O. +//! - [`images`] — markdown image extraction, cover selection, and the +//! filesystem image-dimension probe + cache. +//! - [`cache`] — the in-memory posts cache (rebuild + neighbour lookup). +//! +//! `handlers::posts` stays thin and only orchestrates these. + +pub mod cache; +pub mod images; +pub mod parse; diff --git a/backend/src/post/parse.rs b/backend/src/post/parse.rs new file mode 100644 index 0000000..b3ed349 --- /dev/null +++ b/backend/src/post/parse.rs @@ -0,0 +1,179 @@ +//! Pure post parsing: slug validation, YAML frontmatter, reading time, +//! excerpt. No filesystem or network access — trivially unit-testable. + +use crate::error::AppError; +use crate::models::PostMeta; + +const WORDS_PER_MINUTE: u32 = 200; +const MAX_SLUG_LEN: usize = 100; +const WINDOWS_RESERVED: &[&str] = &[ + "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", + "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", +]; + +pub(crate) fn validate_slug(s: &str) -> Result<(), AppError> { + if s.is_empty() { + return Err(AppError::BadRequest("Slug is empty".to_string())); + } + if s.len() > MAX_SLUG_LEN { + return Err(AppError::BadRequest(format!( + "Slug exceeds {} characters", + MAX_SLUG_LEN + ))); + } + if s.starts_with('.') { + return Err(AppError::BadRequest( + "Slug cannot start with '.'".to_string(), + )); + } + if s.ends_with('.') || s.ends_with(' ') { + return Err(AppError::BadRequest( + "Slug cannot end with '.' or space".to_string(), + )); + } + if s.contains("..") { + return Err(AppError::BadRequest("Slug cannot contain '..'".to_string())); + } + for c in s.chars() { + if c.is_control() { + return Err(AppError::BadRequest( + "Slug contains control characters".to_string(), + )); + } + if matches!(c, '/' | '\\' | '<' | '>' | ':' | '"' | '|' | '?' | '*') { + return Err(AppError::BadRequest(format!( + "Slug contains invalid character '{}'", + c + ))); + } + } + let stem = s.split('.').next().unwrap_or("").to_ascii_uppercase(); + if WINDOWS_RESERVED.iter().any(|r| *r == stem) { + return Err(AppError::BadRequest("Slug is a reserved name".to_string())); + } + Ok(()) +} + +pub(crate) fn split_frontmatter(raw: &str) -> Option<(&str, &str)> { + let raw = raw + .strip_prefix("---\n") + .or_else(|| raw.strip_prefix("---\r\n"))?; + let end_marker = raw.find("\n---\n").or_else(|| raw.find("\r\n---\r\n"))?; + let yaml = &raw[..end_marker]; + let body_start = end_marker + + raw[end_marker..] + .find("---\n") + .or_else(|| raw[end_marker..].find("---\r\n"))? + + "---\n".len(); + let body = raw[body_start..] + .trim_start_matches('\n') + .trim_start_matches('\r'); + Some((yaml, body)) +} + +pub(crate) fn parse_post(raw: &str) -> Result<(PostMeta, String), AppError> { + let (yaml, body) = split_frontmatter(raw).ok_or_else(|| { + AppError::Internal( + "Missing frontmatter".to_string(), + Some("post is missing the YAML --- block".to_string()), + ) + })?; + let meta: PostMeta = serde_yaml::from_str(yaml).map_err(|e| { + AppError::Internal( + "Invalid frontmatter".to_string(), + Some(format!("YAML parse error: {}", e)), + ) + })?; + Ok((meta, body.to_string())) +} + +pub(crate) fn serialize_post(meta: &PostMeta, body: &str) -> Result { + let yaml = serde_yaml::to_string(meta) + .map_err(|e| AppError::Internal("Serialization error".to_string(), Some(e.to_string())))?; + Ok(format!("---\n{}---\n{}", yaml, body)) +} + +pub(crate) fn reading_time(body: &str) -> u32 { + let words = body.split_whitespace().count() as u32; + (words + WORDS_PER_MINUTE - 1) / WORDS_PER_MINUTE.max(1) +} + +pub(crate) fn excerpt_from(meta: &PostMeta, body: &str) -> String { + if let Some(s) = meta.summary.as_ref() { + if !s.trim().is_empty() { + return s.trim().to_string(); + } + } + let plain = body.replace(['#', '*', '_', '`'], "").replace('\n', " "); + let mut out: String = plain.chars().take(200).collect(); + if plain.chars().count() > 200 { + out.push_str("..."); + } + out.trim().to_string() +} + +#[cfg(test)] +mod tests { + use super::{parse_post, reading_time, split_frontmatter, validate_slug}; + use crate::error::AppError; + + #[test] + fn validate_slug_accepts_normal_slugs() { + assert!(validate_slug("hello-world").is_ok()); + assert!(validate_slug("a_b.c-123").is_ok()); + } + + #[test] + fn validate_slug_rejects_traversal_and_bad_chars() { + for bad in [ + "", + "../etc", + "with/slash", + "back\\slash", + "ends.", + "trailing ", + ".hidden", + ] { + assert!( + matches!(validate_slug(bad), Err(AppError::BadRequest(_))), + "expected {bad:?} to be rejected" + ); + } + let too_long = "x".repeat(101); + assert!(validate_slug(&too_long).is_err()); + assert!(matches!(validate_slug("CON"), Err(AppError::BadRequest(_)))); + } + + #[test] + fn split_frontmatter_handles_lf_and_crlf() { + let (yaml, body) = split_frontmatter("---\ndate: 2026-05-16\n---\nHello").unwrap(); + assert_eq!(yaml, "date: 2026-05-16"); + assert_eq!(body, "Hello"); + + let (y2, b2) = split_frontmatter("---\r\ndate: 2026-05-16\r\n---\r\nHi").unwrap(); + assert!(y2.contains("date: 2026-05-16")); + assert_eq!(b2, "Hi"); + + assert!(split_frontmatter("no frontmatter here").is_none()); + } + + #[test] + fn parse_post_reads_meta_and_body() { + let raw = "---\ndate: 2026-05-16\ntitle: Hello\ndraft: true\n---\nBody text"; + let (meta, body) = parse_post(raw).unwrap(); + assert_eq!(meta.title.as_deref(), Some("Hello")); + assert!(meta.draft); + assert_eq!(meta.date.to_string(), "2026-05-16"); + assert_eq!(body, "Body text"); + + assert!(parse_post("no frontmatter").is_err()); + } + + #[test] + fn reading_time_rounds_up_by_wpm() { + assert_eq!(reading_time(""), 0); + assert_eq!(reading_time("one"), 1); + assert_eq!(reading_time(&"word ".repeat(200)), 1); + assert_eq!(reading_time(&"word ".repeat(201)), 2); + } +}