Static site generators¶
Static site generators benefit from deterministic SEO. Generate all SEO payloads at build time and write them to JSON files. In your templates, read the pre generated payload instead of calling seoslug at request time. This gives you complete SEO without runtime overhead.
Hugo¶
Hugo compiles Markdown content into a static site at build time. Pairing it with seoslug gives you complete, deterministic SEO with zero runtime cost. The workflow has four parts: a config file, a generation script, Hugo's data layer, and a template partial.
Prerequisites¶
- Python 3.10+ with
seoslugandpython-frontmatterinstalled - Hugo 0.123+ (any extended version)
- A Hugo theme that provides an
extend_head.htmlhook (e.g. PaperMod)
Recommended project structure¶
site/
├── content/
│ ├── _index.md # homepage
│ ├── posts/
│ │ ├── _index.md # /posts/ listing
│ │ └── hello-world.md # /posts/hello-world/
│ └── about.md # /about/
├── data/
│ └── seo/ # generated by script (gitignored)
│ ├── _index.json # homepage payload
│ ├── posts_hello-world.json
│ └── about.json
├── layouts/
│ └── partials/
│ └── extend_head.html # reads from site.Data.seo
├── scripts/
│ └── generate_seo.py # build-time generation
├── static/
├── seoslug-config.toml
├── requirements.txt
└── Dockerfile
1. Configuration¶
Create seoslug-config.toml at the project root.
canonical_host = "blog.example.com"
public_base_url = "https://blog.example.com"
site_name = "Dev Blog"
title_template = "{title}"
default_robots = "index,follow"
publisher_name = "Your Name"
publisher_logo = "/images/logo.png"
[url_policy]
enforce_https = true
lowercase_paths = true
trailing_slash = "never"
| Key | Purpose |
|---|---|
canonical_host |
Domain used in canonical URLs and JSON-LD |
title_template |
Template string; {title} is replaced with the entity's title |
default_robots |
Fallback robots directive when a page has no explicit status |
url_policy |
Normalization rules for generated URLs |
2. Generation script¶
Create scripts/generate_seo.py. This script iterates over every content file, reads its frontmatter, derives the URL route and entity type, and writes one deterministic SEO JSON per page.
#!/usr/bin/env python3
"""
Build-time SEO payload generator for Hugo.
Iterates content/*.md, determines routes and entity types,
calls seoslug.build_seo_payload for each, and writes JSON
files under data/seo/ for Hugo's site.Data to consume.
Usage:
SEOSLUG_CONFIG=seoslug-config.toml python scripts/generate_seo.py
"""
from __future__ import annotations
import json
import logging
import os
import sys
import tomllib
from pathlib import Path
import frontmatter
from seoslug import SEOConfig, SEOEntity, URLPolicy, build_seo_payload
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%H:%M:%S",
)
log = logging.getLogger("seoslug")
def load_config(path: str) -> SEOConfig:
"""Load seoslug configuration from a TOML file."""
log.info("Loading config from %s", path)
with open(path, "rb") as f:
data = tomllib.load(f)
url_policy_data = data.pop("url_policy", {})
url_policy = URLPolicy(**url_policy_data)
return SEOConfig(**data, url_policy=url_policy)
def route_for(md_path: Path, content_dir: Path) -> str:
"""Derive the URL route from a Hugo content file path.
Hugo conventions:
content/_index.md → /
content/section/_index.md → /section/
content/section/post.md → /section/post/
content/section/post/index.md → /section/post/
"""
rel = md_path.relative_to(content_dir)
parts = list(rel.parts)
if parts[-1] in ("_index.md", "index.md"):
parts.pop()
route = "/" + "/".join(parts)
else:
stem = rel.stem
parts[-1] = stem
route = "/" + "/".join(parts)
return route if route != "/" else "/"
def entity_type_for(md_path: Path) -> str:
"""Map Hugo content conventions to seoslug entity types.
- content/_index.md → home (WebSite schema)
- content/section/_index.md → page (WebPage schema)
- content/section/post.md → post (Article schema)
- content/section/post/index.md → post (Article schema)
"""
if md_path.name == "_index.md":
if md_path.parent == md_path.resolve().root:
return "home"
return "page"
return "post"
def key_for(md_path: Path, content_dir: Path) -> str:
"""Derive a filesystem-safe lookup key matching the Hugo template.
content/studies/howistudy.md → studies_howistudy
content/_index.md → _index
"""
rel = str(md_path.relative_to(content_dir))
return rel.replace("/", "_").replace(".md", "")
def generate_for_config(
config_path: str,
content_dir: Path = Path("content"),
data_dir: Path = Path("data/seo"),
) -> int:
"""Run generation for a single config. Returns count of files processed."""
config = load_config(config_path)
data_dir.mkdir(parents=True, exist_ok=True)
md_files = sorted(content_dir.rglob("*.md"))
if not md_files:
log.warning("No Markdown files found under %s", content_dir)
return 0
generated = 0
skipped = 0
for md_file in md_files:
try:
post = frontmatter.load(md_file)
except Exception as exc:
log.error("Failed to parse %s: %s", md_file, exc)
skipped += 1
continue
meta = post.metadata
title = meta.get("title")
if not title:
log.warning("Skipping %s: no title in frontmatter", md_file)
skipped += 1
continue
route = route_for(md_file, content_dir)
entity_type = entity_type_for(md_file)
# Respect Hugo draft status — mark drafts as "draft" so seoslug
# generates noindex directives and skips schema output.
status = "draft" if meta.get("draft", False) else "published"
entity = SEOEntity(
entity_type=entity_type,
slug=meta.get("slug"),
title=title,
excerpt=meta.get("description"),
status=status,
)
try:
payload = build_seo_payload(entity, route, config)
except Exception as exc:
log.error("build_seo_payload failed for %s: %s", md_file, exc)
skipped += 1
continue
key = key_for(md_file, content_dir)
out_path = data_dir / f"{key}.json"
out_path.write_text(
json.dumps(payload, indent=2, ensure_ascii=False) + "\n"
)
generated += 1
return generated
def main() -> None:
config_path = os.environ.get(
"SEOSLUG_CONFIG",
os.path.join(os.path.dirname(__file__), "..", "seoslug-config.toml"),
)
if not os.path.exists(config_path):
log.error("Config not found: %s", config_path)
sys.exit(1)
log.info("Starting SEO payload generation")
count = generate_for_config(config_path)
log.info("Done — generated %d SEO payloads", count)
if __name__ == "__main__":
main()
Key details about the script:
- Logging: Structured output with timestamps so build logs are debuggable. Set
SEOSLUG_LOG=debugfor verbose output if needed. - Error isolation: Each file is processed in a try/except so a single broken frontmatter block won't halt the entire build.
- Draft support: Hugo's
draft: truefrontmatter is mapped to seoslug'sdraftstatus, which generatesnoindex,nofollowrobots directives and suppresses JSON-LD schema output. - Key derivation:
content/studies/howistudy.md→studies_howistudy. This must match the key derivation in your Hugo template exactly — both use the same transformation. - Route derivation:
content/posts/_index.md→/posts/(section listing),content/posts/hello-world.md→/posts/hello-world/(individual page). The script handles both_index.md(Hugo's branch bundle convention) andindex.md(leaf bundle convention). - Trailing newline: JSON files end with
\nso they play nicely with POSIX tooling andcatin CI debugging.
3. Generated output¶
After running the script, each content file has a corresponding JSON file under data/seo/. Here is what data/seo/posts_hello-world.json looks like for a blog post titled "Hello World":
{
"canonical": "https://blog.example.com/posts/hello-world/",
"description": "My first blog post about getting started.",
"robots": "index,follow",
"og": {
"title": "Hello World",
"description": "My first blog post about getting started.",
"url": "https://blog.example.com/posts/hello-world/",
"image": "https://blog.example.com/images/default-og.png",
"type": "article"
},
"twitter": {
"card": "summary_large_image",
"title": "Hello World",
"description": "My first blog post about getting started.",
"image": "https://blog.example.com/images/default-og.png"
},
"schema_jsonld": {
"@context": "https://schema.org",
"@type": "Article",
"headline": "Hello World",
"description": "My first blog post about getting started.",
"url": "https://blog.example.com/posts/hello-world/",
"author": {
"@type": "Person",
"name": "Your Name"
},
"publisher": {
"@type": "Organization",
"name": "Dev Blog"
}
}
}
A section listing page (data/seo/posts_index.json) uses @type: WebPage instead of Article, and the homepage (data/seo/_index.json) uses @type: WebSite with a potentialAction for search. Every field is deterministic — same content always produces the same payload.
4. Dependencies¶
Create requirements.txt:
Only two runtime dependencies. python-frontmatter parses Hugo's TOML/YAML/JSON frontmatter without needing Hugo itself.
5. Entity type mapping¶
The entity_type_for() function in the script determines which seoslug entity type each content file gets. This controls the JSON-LD schema type and the defaults for Open Graph / Twitter tags.
| Content path | Hugo role | entity_type | JSON-LD @type |
|---|---|---|---|
content/_index.md |
Homepage | home |
WebSite |
content/blog/_index.md |
Section listing | page |
WebPage |
content/blog/post.md |
Regular page | post |
Article |
content/about.md |
Standalone page | post |
Article |
content/blog/post/index.md |
Leaf bundle | post |
Article |
The default mapping treats everything below section level as post (Article schema). If you have content sections that should use different entity types — for example, content/reviews/ mapped to review (Review schema) — override entity_type_for:
CUSTOM_SECTION_TYPES = {
"reviews": "review",
"recipes": "recipe",
"projects": "page", # project overview → WebPage
}
def entity_type_for(md_path: Path, content_dir: Path) -> str:
if md_path.name == "_index.md":
if md_path.parent == md_path.resolve().root:
return "home"
return "page"
rel = md_path.relative_to(content_dir)
section = rel.parts[0] if len(rel.parts) > 1 else ""
return CUSTOM_SECTION_TYPES.get(section, "post")
The entity type affects every field the payload generates:
post→Articleschema,og:type: articlepage→WebPageschema,og:type: websitehome→WebSiteschema withpotentialActionfor search,og:type: websitereview→Reviewschema,og:type: article
Add the same overrides to the default mapping above and seoslug handles the rest — every JSON-LD field and meta tag is adapted to the chosen type automatically.
6. Hugo templates¶
Hugo automatically loads every JSON file under data/ into a nested map accessible via site.Data. A file at data/seo/studies_howistudy.json becomes site.Data.seo.studies_howistudy.
Because Hugo's template engine can't use bracket notation on maps, use the index function for dynamic key lookups.
Strategy A: Full SEO control (replace theme defaults)¶
Use this when your theme has minimal or no built-in SEO. The partial outputs everything: canonical, description, robots, Open Graph, Twitter Cards, and schema JSON-LD.
Create layouts/partials/extend_head.html:
{{- with .File }}
{{- $key := .Path | replaceRE `\.md$` "" | replaceRE `/` "_" }}
{{- with index (site.Data).seo $key }}
<link rel="canonical" href="{{ .canonical }}">
<meta name="description" content="{{ .description }}">
<meta name="robots" content="{{ .robots }}">
<meta property="og:title" content="{{ .og.title }}">
<meta property="og:description" content="{{ .og.description }}">
<meta property="og:url" content="{{ .og.url }}">
<meta property="og:image" content="{{ .og.image }}">
<meta property="og:type" content="{{ .og.type }}">
<meta name="twitter:card" content="{{ .twitter.card }}">
<meta name="twitter:title" content="{{ .twitter.title }}">
<meta name="twitter:description" content="{{ .twitter.description }}">
<meta name="twitter:image" content="{{ .twitter.image }}">
{{- with .schema_jsonld }}
<script type="application/ld+json">{{ . | jsonify | safeJS }}</script>
{{- end }}
{{- end }}
{{- end }}
Strategy B: Complement an existing SEO theme (no duplicate tags)¶
Use this when your theme already emits og:, twitter:, and basic meta tags (PaperMod, Anubis, etc.). Output only what the theme doesn't cover: typically canonical, description, robots, and schema_jsonld.
{{- with .File }}
{{- $key := .Path | replaceRE `\.md$` "" | replaceRE `/` "_" }}
{{- with index (site.Data).seo $key }}
<link rel="canonical" href="{{ .canonical }}">
<meta name="description" content="{{ .description }}">
<meta name="robots" content="{{ .robots }}">
{{- with .schema_jsonld }}
<script type="application/ld+json">{{ . | jsonify | safeJS }}</script>
{{- end }}
{{- end }}
{{- end }}
How the partial works:
{{- with .File }}guards against virtual pages (taxonomies, pagination) that have no backing file — they simply skip the block.- Key derivation mirrors the generation script:
.Pathgivescontent/studies/howistudy.md, the pipe strips.mdand replaces/with_to producestudies_howistudy. index (site.Data).seo $keyperforms the dynamic map lookup. Hugo built the map from all JSON files indata/seo/at startup.- Every field is guaranteed present because seoslug filled all values with deterministic defaults during generation.
Multilingual sites¶
For Hugo sites with multiple languages, seoslug generates a separate payload per language (each language variant is a distinct content file with its own key). Use Hugo's .Translations to render <link rel="alternate" hreflang="..."> tags natively — this is more reliable than trying to derive alternates from the SEO payload.
{{- if .IsTranslated }}
{{- range .Translations }}
<link rel="alternate" hreflang="{{ .Language.LanguageCode }}" href="{{ .RelPermalink }}">
{{- end }}
{{- end }}
Add this to your theme's head.html or extend_head.html alongside the SEO tags. Hugo's .Translations is populated automatically from the content tree — no script changes needed.
Which strategy should I use?
Run curl https://yoursite.com/page/ | grep -E 'og:|twitter:' on a deployed page. If your theme already emits these tags, use Strategy B. Otherwise use Strategy A.
The PaperMod and Anubis themes both include built-in Open Graph and Twitter Card partials, making Strategy B the right choice.
7. Docker multi-stage build¶
A three-stage Dockerfile integrates seoslug into your Hugo build pipeline without adding Python to the final image.
# Stage 1 — SEO payload generation
FROM python:3.12-alpine AS seo
WORKDIR /src
# Install build dependencies first (layer caching)
COPY requirements.txt seoslug-config.toml ./
RUN pip install --no-cache-dir -r requirements.txt
# Copy only what the script needs
COPY scripts/ scripts/
COPY content/ content/
# Generate deterministic SEO payloads
RUN python scripts/generate_seo.py
# Stage 2 — Hugo build
FROM hugomods/hugo:exts AS builder
WORKDIR /src
# Copy generated SEO data before the rest for better cache behavior
COPY --from=seo /src/data/ ./data/
# Copy the full project
COPY . .
# Build the static site
RUN hugo --minify --gc
# Stage 3 — nginx serving
FROM nginx:alpine
COPY --from=builder /src/public /usr/share/nginx/html
Build with:
Stage breakdown:
| Stage | Base image | Purpose |
|---|---|---|
seo |
python:3.12-alpine |
Runs generate_seo.py, writes JSON to data/seo/ |
builder |
hugomods/hugo:exts |
Copies generated data, runs hugo --minify --gc |
| nginx | nginx:alpine |
Serves the static output |
The --gc (garbage collect) flag in the Hugo build removes unused content resources, keeping the output minimal.
8. CI/CD integration (GitHub Actions)¶
Here is a complete GitHub Actions workflow that builds and deploys with Docker:
# .github/workflows/deploy.yml
name: Deploy
on:
push:
branches: [main]
workflow_dispatch:
jobs:
build-and-deploy:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install seoslug and generate SEO
run: |
pip install -r requirements.txt
python scripts/generate_seo.py
- name: Set up Hugo
uses: peaceiris/actions-hugo@v3
with:
hugo-version: "0.136.0"
extended: true
- name: Build site
run: hugo --minify --gc
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: ghcr.io/${{ github.repository }}:latest
This workflow: 1. Generates SEO payloads with Python 2. Builds the Hugo site 3. Packages it into a Docker image and pushes to GHCR
If you prefer a Docker-only workflow (no Python directly in CI), replace steps 2-4 with a single docker build using the multi-stage file from section 7.
9. Verification¶
After deployment, verify your SEO output is correct.
# Check canonical, description, and robots
curl -s https://yoursite.com/posts/hello-world/ | grep -E \
'(canonical|description|robots)' | head -5
# Check Open Graph and Twitter tags
curl -s https://yoursite.com/ | grep -E '(og:|twitter:)' | head -10
# Check JSON-LD schema
curl -s https://yoursite.com/ | grep -oP \
'<script type="application/ld\+json">\K.*?(?=</script>)' | python3 -m json.tool
# Validate all structured data
curl -s https://yoursite.com/posts/hello-world/ | \
grep -oP '<script type="application/ld\+json">\K.*?(?=</script>)' | \
python3 -c "
import sys, json
data = json.load(sys.stdin)
print('@type:', data.get('@type'))
print('headline:', data.get('headline'))
print('author:', data.get('author', {}).get('name'))
"
Also run a visual check with browser DevTools → Elements tab → search for og:, twitter:, and ld+json to confirm no duplicate tags.
10. Production checklist¶
Before going live:
- Every content file has a
titlein frontmatter (required for SEO payload generation) - Every section
_index.mdhas adescriptionfor deterministic og:description -
data/seo/is in.gitignore(generated at build time, not committed) - The key derivation in
extend_head.htmlmatches the script exactly (same replacement logic) - Your theme's
og:/twitter:tags are accounted for (use Strategy A or B accordingly) - Docker build runs successfully end-to-end
- Deployed pages return 200 with correct canonical, no duplicate meta tags
- JSON-LD validates at validator.schema.org
11. Edge cases¶
Leaf bundles¶
Hugo leaf bundles store content in index.md inside a directory (e.g. content/posts/my-post/index.md). The script handles these correctly:
- Route:
index.mdis stripped the same way as_index.md→/posts/my-post/ - Key:
posts/my-post/index.md→posts_my-post_index(matches the Hugo template derivation exactly) - Entity type: Non-
_index.mdfiles →post(Article schema)
No special handling needed — the script's existing logic for _index.md and index.md covers both branch and leaf bundles.
Headless bundles¶
Hugo supports headless bundles (headless: true in frontmatter) that are excluded from the rendered site. The script should skip these:
# In the file-processing loop, after loading frontmatter:
if meta.get("headless", False):
log.info("Skipping headless bundle: %s", md_file)
skipped += 1
continue
Add this check after the draft check in generate_for_config().
Paths with spaces¶
Hugo and the filesystem support spaces in content paths (e.g. content/posts/my first post.md). The script handles these transparently — Path objects, relative-to, and string replacements all preserve spaces. The generated route becomes /posts/my first post/ and the key becomes posts_my first post. Make sure your Hugo template's replaceRE regex doesn't assume space-free paths — it doesn't, since / is the only replaced character.
Missing slug¶
If a content file has no slug in frontmatter, SEOEntity.slug is None. seoslug falls back to deriving the slug from the route, which Hugo infers from the filename. This means a file content/posts/hello-world.md with no slug will still get the correct canonical URL /posts/hello-world/ — no action needed.
Hidden files¶
The script uses content_dir.rglob("*.md"), which includes files in directories starting with . (e.g. content/.drafts/). Filter these out explicitly:
md_files = sorted(
f for f in content_dir.rglob("*.md")
if not any(part.startswith(".") for part in f.relative_to(content_dir).parts)
)
This prevents SEO payloads from being generated for hidden draft directories, archive stashes, or editor temp folders.
Real-world example¶
This exact setup runs in production on egoblog, a Hugo blog deployed via GitHub Actions to GitHub Container Registry. The repo uses Strategy B (PaperMod handles og:/twitter:, seoslug provides canonical/description/robots/schema), a three-stage Dockerfile, and the full generation script from section 2.
Build time generation (generic)¶
Create a script that generates SEO payloads for all your content. This generic approach works with any SSG — the Hugo section below shows a production-ready version with frontmatter parsing and route derivation.
import json
from pathlib import Path
from seoslug import SEOConfig, URLPolicy, SEOEntity, build_seo_payload
config = SEOConfig(
canonical_host="blog.example.com",
public_base_url="https://blog.example.com",
url_policy=URLPolicy(),
)
posts = [
{"slug": "hello-world", "title": "Hello World"},
{"slug": "second-post", "title": "Second Post"},
]
output_dir = Path("_data/seo")
output_dir.mkdir(parents=True, exist_ok=True)
for post in posts:
entity = SEOEntity(
entity_type="post",
title=post["title"],
status="published",
)
payload = build_seo_payload(entity, f"/posts/{post['slug']}", config)
path = output_dir / f"{post['slug']}.json"
path.write_text(json.dumps(payload, indent=2))
Pelican plugin¶
In Pelican, call seoslug during content generation.
from pelican import signals
from seoslug import SEOConfig, URLPolicy, SEOEntity, build_seo_payload
config = SEOConfig(
canonical_host="blog.example.com",
public_base_url="https://blog.example.com",
url_policy=URLPolicy(),
)
def add_seo_metadata(content):
entity = SEOEntity(
entity_type="post",
title=content.title,
excerpt=getattr(content, "summary", None),
status="published",
)
content.seo_payload = build_seo_payload(
entity, content.url, config,
)
def register():
signals.content_object_init.connect(add_seo_metadata)
MkDocs plugin¶
In MkDocs, use the on_page_markdown event.
from seoslug import SEOConfig, URLPolicy, SEOEntity, build_seo_payload
config = SEOConfig(
canonical_host="docs.example.com",
public_base_url="https://docs.example.com",
url_policy=URLPolicy(),
)
def on_page_markdown(markdown, page, config, files):
entity = SEOEntity(
entity_type="page",
title=page.title,
status="published",
)
page.seo_payload = build_seo_payload(
entity, page.url, config,
)
Template usage¶
In your templates, read the payload from the pre generated file.
Jinja (Pelican, MkDocs)¶
<head>
<title>{{ seo.title }}</title>
<meta name="description" content="{{ seo.description }}">
<link rel="canonical" href="{{ seo.canonical }}">
</head>
Go (Hugo)¶
{{- with .File }}
{{- $key := .Path | replaceRE `\.md$` "" | replaceRE `/` "_" }}
{{- with index (site.Data).seo $key }}
<link rel="canonical" href="{{ .canonical }}">
<meta name="description" content="{{ .description }}">
<meta name="robots" content="{{ .robots }}">
<script type="application/ld+json">{{ .schema_jsonld | jsonify | safeJS }}</script>
{{- end }}
{{- end }}
Hugo automatically loads all JSON files under data/ into site.Data.
Use the index function for dynamic key lookups since Hugo maps don't support bracket notation.
Wrap with with .File to skip virtual pages that have no backing content file.