robot-u-site/live_prototype.py

from __future__ import annotations

import asyncio
import re
from typing import Any
from urllib.parse import unquote, urlparse

from calendar_feeds import CalendarFeed, CalendarFeedError, fetch_calendar_feed
from forgejo_client import ForgejoClient, ForgejoClientError
from settings import Settings


async def build_live_prototype_payload(
    settings: Settings,
    *,
    forgejo_token: str | None = None,
    auth_source: str = "none",
    session_user: dict[str, Any] | None = None,
) -> dict[str, object]:
    warnings: list[str] = []
    access_token = forgejo_token or settings.forgejo_token
    has_user_token = bool(access_token) and auth_source in {"authorization", "session"}
    source_cards = [
        {
            "title": "Forgejo base URL",
            "description": settings.forgejo_base_url,
        },
        {
            "title": "Access mode",
            "description": _access_mode_description(access_token, auth_source),
        },
    ]

    calendar_feeds = await _load_calendar_feeds(settings, warnings)
    if settings.calendar_feed_urls:
        source_cards.append(
            {
                "title": "Calendar feeds",
                "description": f"{len(calendar_feeds)} configured feed(s)",
            },
        )

    async with ForgejoClient(settings, forgejo_token=access_token) as client:
        try:
            oidc = await client.fetch_openid_configuration()
        except ForgejoClientError as error:
            warnings.append(str(error))
            oidc = {}

        issuer = oidc.get("issuer", "Unavailable")
        source_cards.append(
            {
                "title": "OIDC issuer",
                "description": str(issuer),
            },
        )

        try:
            repos = await client.search_repositories()
        except ForgejoClientError as error:
            warnings.append(str(error))
            source_cards.append(
                {
                    "title": "Discovery state",
                    "description": "Forgejo connection exists, but live repo discovery failed.",
                },
            )
            return _empty_payload(
                source_cards=source_cards,
                warnings=warnings,
                auth=_auth_payload(
                    session_user, _display_auth_source(auth_source, session_user), settings
                ),
                hero_summary=(
                    "The backend reached aksal.cloud, but the configured token could not complete "
                    "the public repo discovery flow."
                ),
            )

        repos = await _with_configured_discussion_repo(client, repos, settings, warnings)
        current_user = await _current_user_for_auth_source(client, has_user_token, warnings)
        public_repos = [repo for repo in repos if not repo.get("fork") and not repo.get("private")]
        repo_summaries = await asyncio.gather(
            *[_summarize_repo(client, repo) for repo in public_repos],
        )
        content_repos = [summary for summary in repo_summaries if summary is not None]
        course_repos = [summary for summary in content_repos if summary["lesson_count"] > 0]
        post_repos = [summary for summary in content_repos if summary["blog_count"] > 0]
        blog_posts = sorted(
            [post for summary in post_repos for post in summary["blog_posts"]],
            key=lambda post: str(post.get("updated_at", "")),
            reverse=True,
        )
        public_issues = await _recent_public_issues(
            client,
            public_repos,
            settings.forgejo_recent_issue_limit,
        )

        if current_user is not None:
            source_cards.append(
                {
                    "title": "Signed-in API identity",
                    "description": str(current_user.get("login", "Unknown user")),
                },
            )
        source_cards.append(
            {
                "title": "Discovery state",
                "description": (
                    f"Detected {len(course_repos)} course repos, {len(post_repos)} post repos, "
                    f"and {len(public_issues)} recent public issues."
                ),
            },
        )
        auth_user = session_user or current_user

        return {
            "hero": {
                "eyebrow": "Live Forgejo integration",
                "title": "Robot U is reading from your aksal.cloud Forgejo instance.",
                "summary": (
                    "This prototype now uses the real Forgejo base URL, OIDC metadata, visible repos, "
                    "and recent issues available to the active token."
                ),
                "highlights": [
                    "Repo discovery filters to public, non-fork repositories only",
                    "Course repos are detected from /lessons/, post repos from /blogs/",
                    "Recent discussions are loaded from live Forgejo issues",
                ],
            },
            "auth": _auth_payload(
                auth_user if has_user_token else session_user,
                _display_auth_source(auth_source, session_user),
                settings,
            ),
            "source_of_truth": source_cards,
            "discussion_settings": _discussion_settings(settings),
            "featured_courses": [_course_card(summary) for summary in course_repos],
            "recent_posts": [_post_card(post) for post in blog_posts],
            "upcoming_events": _event_cards(calendar_feeds, settings.calendar_event_limit),
            "recent_discussions": await asyncio.gather(
                *[_discussion_card(client, issue) for issue in public_issues],
            ),
            "implementation_notes": [
                "Live repo discovery is now driven by the Forgejo API instead of mock content.",
                "Issues shown here are loaded only from public Forgejo repositories.",
                *warnings,
            ],
        }


def _access_mode_description(access_token: str | None, auth_source: str) -> str:
    if auth_source in {"authorization", "session"} and access_token:
        return f"Authenticated through {auth_source} token."
    if auth_source == "server" or access_token:
        return "Reading public content through the server-side Forgejo token."
    return "Reading public content anonymously."


async def _current_user_for_auth_source(
    client: ForgejoClient,
    has_user_token: bool,
    warnings: list[str],
) -> dict[str, Any] | None:
    if not has_user_token:
        return None

    try:
        return await client.fetch_current_user()
    except ForgejoClientError as error:
        warnings.append(str(error))
        return None


async def _with_configured_discussion_repo(
    client: ForgejoClient,
    repos: list[dict[str, Any]],
    settings: Settings,
    warnings: list[str],
) -> list[dict[str, Any]]:
    owner_repo = _configured_owner_repo(settings.forgejo_general_discussion_repo)
    if owner_repo is None:
        return repos

    owner, repo = owner_repo
    full_name = f"{owner}/{repo}".lower()
    if any(str(candidate.get("full_name", "")).lower() == full_name for candidate in repos):
        return repos

    try:
        configured_repo = await client.fetch_repository(owner, repo)
    except ForgejoClientError as error:
        warnings.append(f"General discussion repo could not be loaded: {error}")
        return repos

    return [*repos, configured_repo]


def _configured_owner_repo(value: str | None) -> tuple[str, str] | None:
    if not value:
        return None
    owner, separator, repo = value.strip().partition("/")
    if not separator or not owner or not repo or "/" in repo:
        return None
    return owner, repo


def _discussion_settings(settings: Settings) -> dict[str, object]:
    return _discussion_settings_from_configured(
        _configured_owner_repo(settings.forgejo_general_discussion_repo) is not None,
    )


def _discussion_settings_from_configured(general_discussion_configured: bool) -> dict[str, object]:
    return {"general_discussion_configured": general_discussion_configured}


async def _summarize_repo(
    client: ForgejoClient,
    repo: dict[str, Any],
) -> dict[str, Any] | None:
    owner_login = _repo_owner_login(repo)
    repo_name = repo.get("name")
    if not isinstance(owner_login, str) or not isinstance(repo_name, str):
        return None
    default_branch = str(repo.get("default_branch") or "main")

    try:
        root_entries = await client.list_directory(owner_login, repo_name)
    except ForgejoClientError:
        return None

    entry_names = {
        entry.get("name")
        for entry in root_entries
        if entry.get("type") == "dir" and isinstance(entry.get("name"), str)
    }
    has_lessons = "lessons" in entry_names
    has_blogs = "blogs" in entry_names
    if not has_lessons and not has_blogs:
        return None

    chapter_count = 0
    lesson_count = 0
    course_outline: list[dict[str, object]] = []
    if has_lessons:
        lesson_entries = await client.list_directory(owner_login, repo_name, "lessons")
        chapter_dirs = _sorted_dir_entries(lesson_entries)
        chapter_count = len(chapter_dirs)
        chapter_entry_lists = await asyncio.gather(
            *[
                client.list_directory(owner_login, repo_name, f"lessons/{entry['name']}")
                for entry in chapter_dirs
                if isinstance(entry.get("name"), str)
            ],
        )
        lesson_count = sum(
            1
            for chapter_entries in chapter_entry_lists
            for entry in chapter_entries
            if entry.get("type") == "dir"
        )
        for chapter_dir, chapter_entries in zip(chapter_dirs, chapter_entry_lists, strict=False):
            chapter_name = str(chapter_dir.get("name", ""))
            lesson_dirs = _sorted_dir_entries(chapter_entries)
            lesson_summaries = await asyncio.gather(
                *[
                    _summarize_lesson(
                        client,
                        owner_login,
                        repo_name,
                        default_branch,
                        str(repo.get("html_url", "")),
                        chapter_name,
                        str(lesson_dir.get("name", "")),
                    )
                    for lesson_dir in lesson_dirs
                ],
            )
            course_outline.append(
                {
                    "slug": chapter_name,
                    "title": _display_name(chapter_name),
                    "lessons": lesson_summaries,
                },
            )

    blog_count = 0
    blog_posts: list[dict[str, object]] = []
    if has_blogs:
        blog_entries = await client.list_directory(owner_login, repo_name, "blogs")
        blog_dirs = _sorted_dir_entries(blog_entries)
        blog_count = len(blog_dirs)
        blog_posts = await asyncio.gather(
            *[
                _summarize_blog_post(
                    client,
                    owner_login,
                    repo_name,
                    str(repo.get("full_name", f"{owner_login}/{repo_name}")),
                    str(repo.get("description") or ""),
                    str(repo.get("updated_at", "")),
                    default_branch,
                    str(repo.get("html_url", "")),
                    str(blog_dir.get("name", "")),
                )
                for blog_dir in blog_dirs
            ],
        )

    return {
        "name": repo_name,
        "owner": owner_login,
        "full_name": repo.get("full_name", f"{owner_login}/{repo_name}"),
        "html_url": repo.get("html_url", ""),
        "description": repo.get("description") or "No repository description yet.",
        "lesson_count": lesson_count,
        "chapter_count": chapter_count,
        "blog_count": blog_count,
        "blog_posts": blog_posts,
        "updated_at": repo.get("updated_at", ""),
        "course_outline": course_outline,
    }


def _course_card(summary: dict[str, Any]) -> dict[str, object]:
    return {
        "title": summary["name"],
        "owner": summary["owner"],
        "name": summary["name"],
        "repo": summary["full_name"],
        "html_url": summary["html_url"],
        "lessons": summary["lesson_count"],
        "chapters": summary["chapter_count"],
        "summary": summary["description"],
        "status": "Live course repo",
        "outline": summary["course_outline"],
        "updated_at": summary["updated_at"],
    }


def _post_card(post: dict[str, Any]) -> dict[str, object]:
    return {
        "title": post["title"],
        "owner": post["owner"],
        "name": post["name"],
        "repo": post["repo"],
        "slug": post["slug"],
        "kind": "Blog post",
        "summary": post["summary"],
        "path": post["path"],
        "file_path": post["file_path"],
        "html_url": post["html_url"],
        "raw_base_url": post["raw_base_url"],
        "assets": post["assets"],
        "body": post["body"],
        "updated_at": post["updated_at"],
    }


async def _recent_public_issues(
    client: ForgejoClient,
    repos: list[dict[str, Any]],
    limit: int,
) -> list[dict[str, Any]]:
    issue_lists = await asyncio.gather(
        *[_repo_issues(client, repo, limit) for repo in repos],
    )
    issues = [issue for issue_list in issue_lists for issue in issue_list]
    return sorted(issues, key=lambda issue: str(issue.get("updated_at", "")), reverse=True)[:limit]


async def _repo_issues(
    client: ForgejoClient,
    repo: dict[str, Any],
    limit: int,
) -> list[dict[str, Any]]:
    owner_login = _repo_owner_login(repo)
    repo_name = repo.get("name")
    if not isinstance(owner_login, str) or not isinstance(repo_name, str):
        return []

    try:
        issues = await client.list_repo_issues(owner_login, repo_name, limit=limit)
    except ForgejoClientError:
        return []

    return [_with_repository(issue, repo, owner_login, repo_name) for issue in issues]


def _with_repository(
    issue: dict[str, Any],
    repo: dict[str, Any],
    owner_login: str,
    repo_name: str,
) -> dict[str, Any]:
    issue_with_repo = dict(issue)
    issue_with_repo["repository"] = {
        "owner": owner_login,
        "name": repo_name,
        "full_name": repo.get("full_name", f"{owner_login}/{repo_name}"),
        "private": False,
    }
    return issue_with_repo


def _repo_owner_login(repo: dict[str, Any]) -> str | None:
    owner = repo.get("owner", {})
    if isinstance(owner, dict) and isinstance(owner.get("login"), str):
        return owner["login"]
    if isinstance(owner, str):
        return owner
    return None


def _event_cards(calendar_feeds: list[CalendarFeed], limit: int) -> list[dict[str, object]]:
    upcoming_events = sorted(
        [event for feed in calendar_feeds for event in feed.events],
        key=lambda event: event.starts_at,
    )[:limit]
    return [
        {
            "title": event.title,
            "when": _format_event_datetime(event.starts_at),
            "source": event.source,
            "mode": event.mode,
        }
        for event in upcoming_events
    ]


async def _discussion_card(client: ForgejoClient, issue: dict[str, Any]) -> dict[str, object]:
    repository = issue.get("repository") or {}
    owner = repository.get("owner", "")
    issue_number = int(issue.get("number", 0))
    comment_items: list[dict[str, object]] = []
    if isinstance(owner, str) and isinstance(repository.get("name"), str) and issue_number > 0:
        try:
            comment_items = [
                _discussion_reply(comment)
                for comment in await client.list_issue_comments(
                    owner,
                    repository["name"],
                    issue_number,
                )
            ]
        except ForgejoClientError:
            comment_items = []

    return discussion_card_from_issue(issue, comments=comment_items)


def discussion_card_from_issue(
    issue: dict[str, Any],
    *,
    comments: list[dict[str, object]] | None = None,
) -> dict[str, object]:
    repository = issue.get("repository") or {}
    full_name = repository.get("full_name", "Unknown repo")
    issue_author = issue.get("user") or {}
    issue_number = int(issue.get("number", 0) or 0)
    labels = [
        label.get("name")
        for label in issue.get("labels", [])
        if isinstance(label, dict) and isinstance(label.get("name"), str)
    ]
    body = str(issue.get("body", "") or "").strip()
    links = discussion_links_from_text(body)
    if not body:
        body = "No issue description yet. Right now the conversation starts in the replies."

    return {
        "id": int(issue.get("id", 0)),
        "title": issue.get("title", "Untitled issue"),
        "repo": full_name,
        "replies": int(issue.get("comments", 0) or 0),
        "context": "Linked discussion" if links else "Live Forgejo issue",
        "author": issue_author.get("login", "Unknown author"),
        "author_avatar_url": issue_author.get("avatar_url", ""),
        "state": issue.get("state", "open"),
        "body": body,
        "number": issue_number,
        "updated_at": issue.get("updated_at", ""),
        "html_url": issue.get("html_url", ""),
        "labels": [label for label in labels if isinstance(label, str)],
        "comments": comments or [],
        "links": links,
    }


def discussion_links_from_text(text: str) -> list[dict[str, object]]:
    links: list[dict[str, object]] = []
    seen: set[tuple[str, str, str, str]] = set()

    for match in re.finditer(
        r"(?:https?://[^\s)]+)?(/posts/([^/\s)]+)/([^/\s)]+)/([^/\s)#?]+))", text
    ):
        owner = unquote(match.group(2))
        repo = unquote(match.group(3))
        slug = unquote(match.group(4).rstrip(".,"))
        path = f"/posts/{owner}/{repo}/{slug}"
        _append_discussion_link(
            links,
            seen,
            {
                "kind": "post",
                "path": path,
                "owner": owner,
                "repo": repo,
                "slug": slug,
                "content_path": f"blogs/{slug}",
            },
        )

    lesson_pattern = (
        r"(?:https?://[^\s)]+)?"
        r"(/courses/([^/\s)]+)/([^/\s)]+)/lessons/([^/\s)]+)/([^/\s)#?]+))"
    )
    for match in re.finditer(lesson_pattern, text):
        owner = unquote(match.group(2))
        repo = unquote(match.group(3))
        chapter = unquote(match.group(4))
        lesson = unquote(match.group(5).rstrip(".,"))
        path = f"/courses/{owner}/{repo}/lessons/{chapter}/{lesson}"
        _append_discussion_link(
            links,
            seen,
            {
                "kind": "lesson",
                "path": path,
                "owner": owner,
                "repo": repo,
                "chapter": chapter,
                "lesson": lesson,
                "content_path": f"lessons/{chapter}/{lesson}",
            },
        )

    for raw_url in re.findall(r"https?://[^\s)]+", text):
        file_link = _forgejo_file_link(raw_url)
        if file_link is not None:
            _append_discussion_link(links, seen, file_link)

    return links


def _append_discussion_link(
    links: list[dict[str, object]],
    seen: set[tuple[str, str, str, str]],
    link: dict[str, object],
) -> None:
    key = (
        str(link.get("kind", "")),
        str(link.get("owner", "")),
        str(link.get("repo", "")),
        str(link.get("content_path", "")),
    )
    if key in seen:
        return
    seen.add(key)
    links.append(link)


def _forgejo_file_link(raw_url: str) -> dict[str, object] | None:
    parsed = urlparse(raw_url.rstrip(".,"))
    path_parts = [unquote(part) for part in parsed.path.strip("/").split("/") if part]
    if len(path_parts) < 6 or path_parts[2:4] != ["src", "branch"]:
        return None

    owner, repo = path_parts[0], path_parts[1]
    content_parts = path_parts[5:]
    if len(content_parts) < 2:
        return None

    if content_parts[0] == "blogs":
        slug = content_parts[1]
        return {
            "kind": "post",
            "path": f"/posts/{owner}/{repo}/{slug}",
            "owner": owner,
            "repo": repo,
            "slug": slug,
            "content_path": f"blogs/{slug}",
        }

    if content_parts[0] == "lessons" and len(content_parts) >= 3:
        chapter = content_parts[1]
        lesson = content_parts[2]
        return {
            "kind": "lesson",
            "path": f"/courses/{owner}/{repo}/lessons/{chapter}/{lesson}",
            "owner": owner,
            "repo": repo,
            "chapter": chapter,
            "lesson": lesson,
            "content_path": f"lessons/{chapter}/{lesson}",
        }

    return None


def _discussion_reply(comment: dict[str, Any]) -> dict[str, object]:
    author = comment.get("user") or {}
    body = str(comment.get("body", "") or "").strip()
    if not body:
        body = "No comment body provided."

    return {
        "id": int(comment.get("id", 0)),
        "author": author.get("login", "Unknown author"),
        "avatar_url": author.get("avatar_url", ""),
        "body": body,
        "created_at": comment.get("created_at", ""),
        "html_url": comment.get("html_url", ""),
    }


def _display_auth_source(auth_source: str, session_user: dict[str, Any] | None) -> str:
    if session_user:
        return "session"
    return auth_source


def _empty_payload(
    *,
    source_cards: list[dict[str, str]],
    warnings: list[str],
    auth: dict[str, object],
    hero_summary: str,
) -> dict[str, object]:
    return {
        "hero": {
            "eyebrow": "Forgejo connection status",
            "title": "Robot U is configured for aksal.cloud.",
            "summary": hero_summary,
            "highlights": [
                "Forgejo remains the source of truth for content and discussions",
                "The prototype now targets aksal.cloud by default",
                "Public repo discovery works without signing in when Forgejo allows anonymous reads",
            ],
        },
        "auth": auth,
        "source_of_truth": source_cards,
        "discussion_settings": _discussion_settings_from_configured(False),
        "featured_courses": [],
        "recent_posts": [],
        "upcoming_events": [],
        "recent_discussions": [],
        "implementation_notes": warnings
        or ["Live repo discovery is ready, but Forgejo did not return public content."],
    }


def _auth_payload(
    user: dict[str, Any] | None,
    source: str,
    settings: Settings,
) -> dict[str, object]:
    oauth_configured = bool(
        settings.auth_secret_key
        and settings.forgejo_oauth_client_id
        and settings.forgejo_oauth_client_secret
    )
    if not user:
        return {
            "authenticated": False,
            "login": None,
            "source": source,
            "can_reply": source in {"authorization", "session"},
            "oauth_configured": oauth_configured,
        }

    return {
        "authenticated": True,
        "login": user.get("login", "Unknown user"),
        "source": source,
        "can_reply": source in {"authorization", "session"},
        "oauth_configured": oauth_configured,
    }


async def _summarize_blog_post(
    client: ForgejoClient,
    owner: str,
    repo: str,
    full_name: str,
    repo_description: str,
    updated_at: str,
    default_branch: str,
    repo_html_url: str,
    post_name: str,
) -> dict[str, object]:
    post_path = f"blogs/{post_name}"
    fallback_title = _display_name(post_name)
    raw_base_url = _raw_folder_url(repo_html_url, default_branch, post_path)

    try:
        post_entries = await client.list_directory(owner, repo, post_path)
    except ForgejoClientError:
        return _empty_blog_post(
            owner,
            repo,
            full_name,
            post_name,
            fallback_title,
            repo_description,
            updated_at,
            post_path,
            raw_base_url=raw_base_url,
        )

    assets = _content_assets(post_entries, raw_base_url, post_path)
    markdown_files = _markdown_file_entries(post_entries)
    if not markdown_files:
        return _empty_blog_post(
            owner,
            repo,
            full_name,
            post_name,
            fallback_title,
            repo_description,
            updated_at,
            post_path,
            raw_base_url=raw_base_url,
            assets=assets,
        )

    markdown_name = str(markdown_files[0]["name"])
    markdown_path = f"{post_path}/{markdown_name}"

    try:
        file_payload = await client.get_file_content(owner, repo, markdown_path)
    except ForgejoClientError:
        return _empty_blog_post(
            owner,
            repo,
            full_name,
            post_name,
            fallback_title,
            repo_description,
            updated_at,
            post_path,
            file_path=markdown_path,
            html_url=str(markdown_files[0].get("html_url", "")),
            raw_base_url=raw_base_url,
            assets=assets,
        )

    metadata, body = _parse_frontmatter(str(file_payload.get("content", "")))
    return {
        "slug": post_name,
        "title": str(metadata.get("title") or _display_name(markdown_name) or fallback_title),
        "owner": owner,
        "name": repo,
        "repo": full_name,
        "summary": str(metadata.get("summary") or repo_description or ""),
        "path": post_path,
        "file_path": str(file_payload.get("path", markdown_path)),
        "html_url": str(file_payload.get("html_url", "")),
        "raw_base_url": raw_base_url,
        "assets": assets,
        "body": body,
        "updated_at": updated_at,
    }


async def _summarize_lesson(
    client: ForgejoClient,
    owner: str,
    repo: str,
    default_branch: str,
    repo_html_url: str,
    chapter_name: str,
    lesson_name: str,
) -> dict[str, object]:
    lesson_path = f"lessons/{chapter_name}/{lesson_name}"
    fallback_title = _display_name(lesson_name)
    raw_base_url = _raw_folder_url(repo_html_url, default_branch, lesson_path)

    try:
        lesson_entries = await client.list_directory(owner, repo, lesson_path)
    except ForgejoClientError:
        return _empty_lesson(lesson_name, fallback_title, lesson_path, raw_base_url=raw_base_url)

    assets = _content_assets(lesson_entries, raw_base_url, lesson_path)
    markdown_files = _markdown_file_entries(lesson_entries)
    if not markdown_files:
        return _empty_lesson(
            lesson_name,
            fallback_title,
            lesson_path,
            raw_base_url=raw_base_url,
            assets=assets,
        )

    markdown_name = str(markdown_files[0]["name"])
    markdown_path = f"{lesson_path}/{markdown_name}"

    try:
        file_payload = await client.get_file_content(owner, repo, markdown_path)
    except ForgejoClientError:
        return _empty_lesson(
            lesson_name,
            fallback_title,
            lesson_path,
            file_path=markdown_path,
            html_url=str(markdown_files[0].get("html_url", "")),
            raw_base_url=raw_base_url,
            assets=assets,
        )

    metadata, body = _parse_frontmatter(str(file_payload.get("content", "")))
    return {
        "slug": lesson_name,
        "title": str(metadata.get("title") or _display_name(markdown_name) or fallback_title),
        "summary": str(metadata.get("summary") or ""),
        "path": lesson_path,
        "file_path": str(file_payload.get("path", markdown_path)),
        "html_url": str(file_payload.get("html_url", "")),
        "raw_base_url": raw_base_url,
        "assets": assets,
        "body": body,
    }


def _sorted_dir_entries(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
    return sorted(
        [
            entry
            for entry in entries
            if entry.get("type") == "dir" and isinstance(entry.get("name"), str)
        ],
        key=lambda entry: str(entry["name"]),
    )


def _markdown_file_entries(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
    return sorted(
        [
            entry
            for entry in entries
            if entry.get("type") == "file"
            and isinstance(entry.get("name"), str)
            and str(entry.get("name", "")).lower().endswith(".md")
        ],
        key=lambda entry: str(entry["name"]),
    )


def _content_assets(
    entries: list[dict[str, Any]],
    raw_base_url: str,
    folder_path: str,
) -> list[dict[str, object]]:
    assets: list[dict[str, object]] = []
    for entry in entries:
        if entry.get("type") != "file" or not isinstance(entry.get("name"), str):
            continue
        name = str(entry["name"])
        if name.lower().endswith(".md"):
            continue

        path = str(entry.get("path") or f"{folder_path}/{name}")
        assets.append(
            {
                "name": name,
                "path": path,
                "html_url": str(entry.get("html_url", "")),
                "download_url": str(entry.get("download_url") or _raw_file_url(raw_base_url, name)),
            },
        )

    return sorted(assets, key=lambda asset: str(asset["name"]))


def _raw_folder_url(repo_html_url: str, default_branch: str, folder_path: str) -> str:
    if not repo_html_url:
        return ""
    branch = default_branch.strip("/") or "main"
    return f"{repo_html_url.rstrip('/')}/raw/branch/{branch}/{folder_path.strip('/')}/"


def _raw_file_url(raw_base_url: str, name: str) -> str:
    if not raw_base_url:
        return ""
    return f"{raw_base_url.rstrip('/')}/{name}"


def _display_name(value: str) -> str:
    cleaned = value.strip().rsplit(".", 1)[0]
    cleaned = cleaned.replace("_", " ").replace("-", " ")
    cleaned = " ".join(cleaned.split())
    cleaned = cleaned.lstrip("0123456789 ").strip()
    return cleaned.title() or value


async def _load_calendar_feeds(settings: Settings, warnings: list[str]) -> list[CalendarFeed]:
    if not settings.calendar_feed_urls:
        return []

    results = await asyncio.gather(
        *[
            fetch_calendar_feed(url, settings.forgejo_request_timeout_seconds)
            for url in settings.calendar_feed_urls
        ],
        return_exceptions=True,
    )

    feeds: list[CalendarFeed] = []
    for url, result in zip(settings.calendar_feed_urls, results, strict=False):
        if isinstance(result, CalendarFeed):
            feeds.append(result)
            continue
        if isinstance(result, CalendarFeedError):
            warnings.append(str(result))
            continue
        if isinstance(result, Exception):
            warnings.append(f"Calendar feed failed for {url}: {result}")

    return feeds


def _format_event_datetime(value: Any) -> str:
    timezone_name = value.strftime("%Z") if hasattr(value, "strftime") else ""
    suffix = f" {timezone_name}" if timezone_name else ""
    return f"{value.strftime('%b %-d, %-I:%M %p')}{suffix}"


def _empty_lesson(
    lesson_name: str,
    title: str,
    lesson_path: str,
    *,
    file_path: str = "",
    html_url: str = "",
    raw_base_url: str = "",
    assets: list[dict[str, object]] | None = None,
) -> dict[str, object]:
    return {
        "slug": lesson_name,
        "title": title,
        "summary": "",
        "path": lesson_path,
        "file_path": file_path,
        "html_url": html_url,
        "raw_base_url": raw_base_url,
        "assets": assets or [],
        "body": "",
    }


def _empty_blog_post(
    owner: str,
    repo: str,
    full_name: str,
    post_name: str,
    title: str,
    summary: str,
    updated_at: str,
    post_path: str,
    *,
    file_path: str = "",
    html_url: str = "",
    raw_base_url: str = "",
    assets: list[dict[str, object]] | None = None,
) -> dict[str, object]:
    return {
        "slug": post_name,
        "title": title,
        "owner": owner,
        "name": repo,
        "repo": full_name,
        "summary": summary,
        "path": post_path,
        "file_path": file_path,
        "html_url": html_url,
        "raw_base_url": raw_base_url,
        "assets": assets or [],
        "body": "",
        "updated_at": updated_at,
    }


def _parse_frontmatter(markdown: str) -> tuple[dict[str, str], str]:
    if not markdown.startswith("---\n"):
        return {}, markdown.strip()

    lines = markdown.splitlines()
    if not lines or lines[0].strip() != "---":
        return {}, markdown.strip()

    metadata: dict[str, str] = {}
    for index, line in enumerate(lines[1:], start=1):
        if line.strip() == "---":
            body = "\n".join(lines[index + 1 :]).strip()
            return metadata, body
        if ":" not in line:
            continue

        key, raw_value = line.split(":", 1)
        key = key.strip()
        value = raw_value.strip().strip("\"'")
        if key and value:
            metadata[key] = value

    return {}, markdown.strip()