2 Commits

Author SHA1 Message Date
Christian Gick
3c35ff84fc fix: remove duplicate structured-data partial call (IFK-15)
All checks were successful
Deploy Internet for Kids / Build & Push (push) Successful in 11s
Deploy Internet for Kids / Deploy (push) Successful in 5s
Deploy Internet for Kids / Health Check (push) Successful in 1s
Deploy Internet for Kids / Smoke Tests (push) Successful in 2s
Deploy Internet for Kids / IndexNow Ping (push) Successful in 7s
Deploy Internet for Kids / Promote to Latest (push) Successful in 1s
Deploy Internet for Kids / Rollback (push) Has been skipped
Deploy Internet for Kids / Audit (push) Successful in 1s
structured-data.html was called both in extend-head.html (line 2) and
baseof.html (line 56), resulting in duplicate JSON-LD blocks:
- Home: 2x WebSite schema
- Articles: 2x BlogPosting + 2x BreadcrumbList

Remove the redundant call in baseof.html. Article pages now emit 1
BlogPosting + 1 BreadcrumbList as intended.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-16 20:54:23 +03:00
Christian Gick
03fcececfc feat: add outgoing link checker script (IFK-14)
All checks were successful
Deploy Internet for Kids / Build & Push (push) Successful in 12s
Deploy Internet for Kids / Deploy (push) Successful in 5s
Deploy Internet for Kids / Health Check (push) Successful in 1s
Deploy Internet for Kids / Smoke Tests (push) Successful in 2s
Deploy Internet for Kids / IndexNow Ping (push) Successful in 8s
Deploy Internet for Kids / Promote to Latest (push) Successful in 1s
Deploy Internet for Kids / Rollback (push) Has been skipped
Deploy Internet for Kids / Audit (push) Successful in 2s
Verify external source URLs in content/*.md are reachable. HEAD request
with GET fallback, treats known bot-blocker 403s (hhs.gov, jamanetwork,
consilium.europa.eu etc.) as OK.

Usage:
  scripts/check-links.sh              # all content files
  scripts/check-links.sh --fast       # stop at first failure
  scripts/check-links.sh --verbose

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-16 20:45:14 +03:00
2 changed files with 153 additions and 1 deletions

View File

@@ -53,6 +53,5 @@
</footer> </footer>
{{ partial "rybbit.html" . }} {{ partial "rybbit.html" . }}
{{ partial "structured-data.html" . }}
</body> </body>
</html> </html>

153
scripts/check-links.sh Executable file
View File

@@ -0,0 +1,153 @@
#!/usr/bin/env bash
# check-links.sh — Verify all external URLs in content/*.md are reachable.
#
# Extracts https?:// URLs from content/*.md, sends a HEAD request (fallback to GET),
# and reports 404s / connection errors. Known bot-blockers (403 responses from
# hhs.gov, jamanetwork.com etc.) are treated as OK.
#
# Exit codes:
# 0 — all links OK or only bot-blocked
# 1 — one or more broken links found
# 2 — usage error
#
# Usage:
# scripts/check-links.sh # check all content files
# scripts/check-links.sh --fast # stop at first failure
# scripts/check-links.sh --verbose # show every URL being checked
# scripts/check-links.sh path/to.md # check a specific file
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
FAST=0
VERBOSE=0
FILES=()
for arg in "$@"; do
case "$arg" in
--fast) FAST=1 ;;
--verbose|-v) VERBOSE=1 ;;
--help|-h)
sed -n '2,18p' "$0" | sed 's/^# \{0,1\}//'
exit 0 ;;
-*) echo "Unknown option: $arg" >&2; exit 2 ;;
*) FILES+=("$arg") ;;
esac
done
# Default: all .md files under content/
if [ "${#FILES[@]}" -eq 0 ]; then
mapfile -t FILES < <(find "$PROJECT_ROOT/content" -type f -name '*.md')
fi
# Domains known to return 403 for automated requests but are valid sources.
# Keep this list short — anything that doesn't load in a browser should fail.
ALLOW_403='^(www\.)?(hhs\.gov|jamanetwork\.com|commission\.europa\.eu|digital-strategy\.ec\.europa\.eu|pubmed\.ncbi\.nlm\.nih\.gov|who\.int|oecd\.org|unesco\.org|unicef\.org|amnesty\.org|linkedin\.com|sciencedirect\.com|nature\.com|tandfonline\.com|springer\.com|wiley\.com|bmj\.com|nytimes\.com|ft\.com|wsj\.com|spiegel\.de|zeit\.de|lemonde\.fr|lefigaro\.fr|cnn\.com|worldhappiness\.report|danish-presidency\.consilium\.europa\.eu|consilium\.europa\.eu|europarl\.europa\.eu|ec\.europa\.eu)$'
# Extract URLs from markdown. Strip trailing punctuation/brackets.
extract_urls() {
local file="$1"
grep -Eo 'https?://[A-Za-z0-9._~:/?#@!$&+,;=%-]+' "$file" \
| sed -E 's/[,.;:!?)\]]+$//' \
| sort -u
}
# Returns status code (HTTP number or "000" if fetch failed)
check_url() {
local url="$1"
local status
status=$(curl -sS -o /dev/null -w "%{http_code}" \
--max-time 10 \
--connect-timeout 5 \
--retry 1 \
--user-agent "Mozilla/5.0 (compatible; IFK-LinkChecker/1.0; +https://internetforkids.org)" \
-I -L "$url" 2>/dev/null || echo "000")
# Many servers reject HEAD → retry with GET
if [ "$status" = "000" ] || [ "$status" = "405" ] || [ "$status" = "403" ]; then
status=$(curl -sS -o /dev/null -w "%{http_code}" \
--max-time 10 \
--connect-timeout 5 \
--retry 1 \
--user-agent "Mozilla/5.0 (compatible; IFK-LinkChecker/1.0; +https://internetforkids.org)" \
-L "$url" 2>/dev/null || echo "000")
fi
echo "$status"
}
# Classify status code as OK / bot-blocked / broken
classify() {
local status="$1" url="$2"
case "$status" in
2[0-9][0-9]|3[0-9][0-9]) echo "ok" ;;
403)
local host
host=$(echo "$url" | awk -F/ '{print $3}')
if [[ "$host" =~ $ALLOW_403 ]]; then
echo "ok-blocked"
else
echo "broken"
fi ;;
404|410|5[0-9][0-9]|000) echo "broken" ;;
*) echo "broken" ;;
esac
}
# Collect URL → files mapping
declare -A URL_FILES=()
for file in "${FILES[@]}"; do
while IFS= read -r url; do
[ -z "$url" ] && continue
rel="${file#$PROJECT_ROOT/}"
if [ -z "${URL_FILES[$url]+x}" ]; then
URL_FILES[$url]="$rel"
else
URL_FILES[$url]="${URL_FILES[$url]}, $rel"
fi
done < <(extract_urls "$file")
done
TOTAL=${#URL_FILES[@]}
echo "Checking $TOTAL unique external URLs across ${#FILES[@]} files..."
if [ "$TOTAL" -eq 0 ]; then
echo "No external URLs found."
exit 0
fi
BROKEN=0
OK=0
BLOCKED=0
for url in "${!URL_FILES[@]}"; do
status=$(check_url "$url")
result=$(classify "$status" "$url")
case "$result" in
ok)
OK=$((OK + 1))
[ "$VERBOSE" -eq 1 ] && printf " OK %s %s\n" "$status" "$url"
;;
ok-blocked)
BLOCKED=$((BLOCKED + 1))
[ "$VERBOSE" -eq 1 ] && printf " 403* %s (bot-blocked, allowlisted)\n" "$url"
;;
broken)
BROKEN=$((BROKEN + 1))
printf " FAIL %s %s\n in: %s\n" "$status" "$url" "${URL_FILES[$url]}"
if [ "$FAST" -eq 1 ]; then
echo ""
echo "Aborting early (--fast). $BROKEN broken, $OK ok, $BLOCKED allowlisted."
exit 1
fi
;;
esac
done
echo ""
echo "Summary: $OK OK, $BLOCKED allowlisted (403), $BROKEN broken"
if [ "$BROKEN" -gt 0 ]; then
exit 1
fi