// Lightweight helpers for OneNote table HTML. No full HTML parser — OneNote's
// table markup from Graph is predictable enough that targeted regex works for
// read-as-structured and cell patching. Upgrade to a real parser (cheerio /
// linkedom) if edge cases show up.
//
// OneNote elements carry their addressable id on the `id` attribute (format
// like `td:{guid}{n}`), NOT `data-id`. `includeIDs=true` just guarantees these
// are present; PATCH commands target the full `id` string verbatim.
const ROW_RE = /<tr[^>]*\bid="([^"]+)"[^>]*>([\s\S]*?)<\/tr>/gi;
const CELL_RE = /<td[^>]*\bid="([^"]+)"[^>]*>([\s\S]*?)<\/td>/gi;
export function extractTables(pageHtml) {
// Walks <table> blocks and returns them with row/cell metadata.
const tables = [];
const tableRe = /<table[^>]*\bid="([^"]+)"[^>]*>([\s\S]*?)<\/table>/gi;
let m;
while ((m = tableRe.exec(pageHtml)) !== null) {
const tableId = m[1];
const inner = m[2];
const rows = [];
let rm;
ROW_RE.lastIndex = 0;
while ((rm = ROW_RE.exec(inner)) !== null) {
const rowId = rm[1];
const rowInner = rm[2];
const cells = [];
let cm;
CELL_RE.lastIndex = 0;
while ((cm = CELL_RE.exec(rowInner)) !== null) {
cells.push({ id: cm[1], html: cm[2], text: stripHtml(cm[2]) });
}
rows.push({ id: rowId, cells });
}
tables.push({ id: tableId, rows });
}
return tables;
}
export function stripHtml(html) {
return html
.replace(/<[^>]+>/g, '')
.replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)))
.replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number(dec)))
.replace(/ /g, ' ')
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/\s+/g, ' ')
.trim();
}
export function strikeHtml(html) {
// Wraps cell content in <s>…</s> if not already struck.
if (/^\s*<s>[\s\S]*<\/s>\s*$/i.test(html)) return html;
return `<s>${html}</s>`;
}
const SUFFIX_MULTIPLIERS = { k: 1_000, m: 1_000_000, b: 1_000_000_000 };
export function parseNumber(text) {
// Accepts "1 234,56 Ft", "1,234.56", "13 519", "1.234,56", "5k", "1.2M" etc. NaN if unparseable.
const raw = String(text);
const suffixMatch = raw.match(/([kKmMbB])\b/);
const multiplier = suffixMatch ? SUFFIX_MULTIPLIERS[suffixMatch[1].toLowerCase()] : 1;
const stripped = raw.replace(/[^\d,.\- ]/g, '').replace(/\s+/g, '');
if (!/\d/.test(stripped)) return NaN;
const lastComma = stripped.lastIndexOf(',');
const lastDot = stripped.lastIndexOf('.');
let normalized;
if (lastComma === -1 && lastDot === -1) {
normalized = stripped;
} else if (lastComma > lastDot) {
normalized = stripped.replace(/\./g, '').replace(',', '.');
} else {
normalized = stripped.replace(/,/g, '');
}
const n = Number(normalized);
return Number.isFinite(n) ? n * multiplier : NaN;
}
export function buildPatchReplace(targetId, content) {
return [{ target: targetId, action: 'replace', content }];
}
export function buildPatchAppend(targetId, content) {
return [{ target: targetId, action: 'append', content }];
}