robot-u-site/frontend/src/MarkdownContent.tsx

312 lines
7.7 KiB
TypeScript

type ListType = "ol" | "ul";
type ParserState = {
output: string[];
paragraphLines: string[];
blockquoteLines: string[];
listItems: string[];
listType: ListType | null;
inCodeBlock: boolean;
codeLanguage: string;
codeLines: string[];
};
function escapeHtml(value: string): string {
return value
.replace(/&/g, "&")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#39;");
}
function normalizeLinkTarget(value: string, baseUrl?: string): string | null {
const trimmed = value.trim();
if (!trimmed) {
return null;
}
if (trimmed.startsWith("/")) {
return escapeHtml(trimmed);
}
try {
const url = new URL(trimmed, baseUrl || undefined);
if (url.protocol === "http:" || url.protocol === "https:") {
return escapeHtml(url.toString());
}
} catch {
return null;
}
return null;
}
function renderInline(markdown: string, baseUrl?: string): string {
const codeTokens: string[] = [];
let rendered = escapeHtml(markdown);
rendered = rendered.replace(/`([^`]+)`/g, (_match, code: string) => {
const token = `__CODE_TOKEN_${codeTokens.length}__`;
codeTokens.push(`<code>${code}</code>`);
return token;
});
rendered = rendered.replace(
/!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/g,
(_match, label: string, href: string) => {
const safeHref = normalizeLinkTarget(href, baseUrl);
if (!safeHref) {
return label;
}
return `<img src="${safeHref}" alt="${label}" loading="lazy" />`;
},
);
rendered = rendered.replace(
/\[([^\]]+)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/g,
(_match, label: string, href: string) => {
const safeHref = normalizeLinkTarget(href, baseUrl);
if (!safeHref) {
return label;
}
return `<a href="${safeHref}" target="_blank" rel="noreferrer">${label}</a>`;
},
);
rendered = rendered.replace(/\*\*([^*]+)\*\*/g, "<strong>$1</strong>");
rendered = rendered.replace(/\*([^*]+)\*/g, "<em>$1</em>");
return rendered.replace(/__CODE_TOKEN_(\d+)__/g, (_match, index: string) => {
return codeTokens[Number(index)] ?? "";
});
}
function createParserState(): ParserState {
return {
output: [],
paragraphLines: [],
blockquoteLines: [],
listItems: [],
listType: null,
inCodeBlock: false,
codeLanguage: "",
codeLines: [],
};
}
function flushParagraph(state: ParserState, baseUrl?: string) {
if (state.paragraphLines.length === 0) {
return;
}
state.output.push(`<p>${renderInline(state.paragraphLines.join(" "), baseUrl)}</p>`);
state.paragraphLines.length = 0;
}
function flushList(state: ParserState) {
if (state.listItems.length === 0 || !state.listType) {
return;
}
state.output.push(
`<${state.listType}>${state.listItems.map((item) => `<li>${item}</li>`).join("")}</${state.listType}>`,
);
state.listItems.length = 0;
state.listType = null;
}
function flushBlockquote(state: ParserState, baseUrl?: string) {
if (state.blockquoteLines.length === 0) {
return;
}
state.output.push(
`<blockquote><p>${renderInline(state.blockquoteLines.join(" "), baseUrl)}</p></blockquote>`,
);
state.blockquoteLines.length = 0;
}
function flushCodeBlock(state: ParserState) {
if (!state.inCodeBlock) {
return;
}
const languageClass = state.codeLanguage
? ` class="language-${escapeHtml(state.codeLanguage)}"`
: "";
state.output.push(
`<pre><code${languageClass}>${escapeHtml(state.codeLines.join("\n"))}</code></pre>`,
);
state.inCodeBlock = false;
state.codeLanguage = "";
state.codeLines.length = 0;
}
function flushInlineBlocks(state: ParserState, baseUrl?: string) {
flushParagraph(state, baseUrl);
flushList(state);
flushBlockquote(state, baseUrl);
}
function handleCodeBlockLine(state: ParserState, line: string): boolean {
if (!state.inCodeBlock) {
return false;
}
if (line.trim().startsWith("```")) {
flushCodeBlock(state);
} else {
state.codeLines.push(line);
}
return true;
}
function handleFenceStart(state: ParserState, line: string, baseUrl?: string): boolean {
if (!line.trim().startsWith("```")) {
return false;
}
flushInlineBlocks(state, baseUrl);
state.inCodeBlock = true;
state.codeLanguage = line.trim().slice(3).trim();
return true;
}
function handleBlankLine(state: ParserState, line: string, baseUrl?: string): boolean {
if (line.trim()) {
return false;
}
flushInlineBlocks(state, baseUrl);
return true;
}
function handleHeadingLine(state: ParserState, line: string, baseUrl?: string): boolean {
const headingMatch = line.match(/^(#{1,6})\s+(.*)$/);
if (!headingMatch) {
return false;
}
flushInlineBlocks(state, baseUrl);
const level = headingMatch[1].length;
state.output.push(`<h${level}>${renderInline(headingMatch[2].trim(), baseUrl)}</h${level}>`);
return true;
}
function handleRuleLine(state: ParserState, line: string, baseUrl?: string): boolean {
if (!/^(-{3,}|\*{3,})$/.test(line.trim())) {
return false;
}
flushInlineBlocks(state, baseUrl);
state.output.push("<hr />");
return true;
}
function handleListLine(
state: ParserState,
line: string,
listType: ListType,
baseUrl?: string,
): boolean {
const pattern = listType === "ul" ? /^[-*+]\s+(.*)$/ : /^\d+\.\s+(.*)$/;
const match = line.match(pattern);
if (!match) {
return false;
}
flushParagraph(state, baseUrl);
flushBlockquote(state, baseUrl);
if (state.listType !== listType) {
flushList(state);
state.listType = listType;
}
state.listItems.push(renderInline(match[1].trim(), baseUrl));
return true;
}
function handleBlockquoteLine(state: ParserState, line: string, baseUrl?: string): boolean {
const match = line.match(/^>\s?(.*)$/);
if (!match) {
return false;
}
flushParagraph(state, baseUrl);
flushList(state);
state.blockquoteLines.push(match[1].trim());
return true;
}
function handleParagraphLine(state: ParserState, line: string, baseUrl?: string) {
flushList(state);
flushBlockquote(state, baseUrl);
state.paragraphLines.push(line.trim());
}
function processMarkdownLine(state: ParserState, line: string, baseUrl?: string) {
if (handleCodeBlockLine(state, line)) {
return;
}
if (handleFenceStart(state, line, baseUrl)) {
return;
}
if (handleBlankLine(state, line, baseUrl)) {
return;
}
if (handleHeadingLine(state, line, baseUrl)) {
return;
}
if (handleRuleLine(state, line, baseUrl)) {
return;
}
if (handleListLine(state, line, "ul", baseUrl) || handleListLine(state, line, "ol", baseUrl)) {
return;
}
if (handleBlockquoteLine(state, line, baseUrl)) {
return;
}
handleParagraphLine(state, line, baseUrl);
}
function markdownToHtml(markdown: string, baseUrl?: string): string {
const state = createParserState();
const lines = markdown.replace(/\r\n/g, "\n").split("\n");
for (const line of lines) {
processMarkdownLine(state, line, baseUrl);
}
flushInlineBlocks(state, baseUrl);
flushCodeBlock(state);
return state.output.join("");
}
export function stripLeadingTitleHeading(markdown: string, title: string): string {
const trimmed = markdown.trimStart();
if (!trimmed.startsWith("#")) {
return markdown;
}
const lines = trimmed.split("\n");
const firstLine = lines[0]?.trim() ?? "";
if (firstLine === `# ${title}`) {
return lines.slice(1).join("\n").trimStart();
}
return markdown;
}
export function MarkdownContent(props: { markdown: string; className?: string; baseUrl?: string }) {
const html = markdownToHtml(props.markdown, props.baseUrl);
const className = props.className ? `markdown-content ${props.className}` : "markdown-content";
return <div className={className} dangerouslySetInnerHTML={{ __html: html }} />;
}