RSS Git Download  Clone
Raw Blame History 3kB 90 lines
// Lightweight helpers for OneNote table HTML. No full HTML parser — OneNote's
// table markup from Graph is predictable enough that targeted regex works for
// read-as-structured and cell patching. Upgrade to a real parser (cheerio /
// linkedom) if edge cases show up.
//
// OneNote elements carry their addressable id on the `id` attribute (format
// like `td:{guid}{n}`), NOT `data-id`. `includeIDs=true` just guarantees these
// are present; PATCH commands target the full `id` string verbatim.

const ROW_RE = /<tr[^>]*\bid="([^"]+)"[^>]*>([\s\S]*?)<\/tr>/gi;
const CELL_RE = /<td[^>]*\bid="([^"]+)"[^>]*>([\s\S]*?)<\/td>/gi;

export function extractTables(pageHtml) {
    // Walks <table> blocks and returns them with row/cell metadata.
    const tables = [];
    const tableRe = /<table[^>]*\bid="([^"]+)"[^>]*>([\s\S]*?)<\/table>/gi;
    let m;
    while ((m = tableRe.exec(pageHtml)) !== null) {
        const tableId = m[1];
        const inner = m[2];
        const rows = [];
        let rm;
        ROW_RE.lastIndex = 0;
        while ((rm = ROW_RE.exec(inner)) !== null) {
            const rowId = rm[1];
            const rowInner = rm[2];
            const cells = [];
            let cm;
            CELL_RE.lastIndex = 0;
            while ((cm = CELL_RE.exec(rowInner)) !== null) {
                cells.push({ id: cm[1], html: cm[2], text: stripHtml(cm[2]) });
            }
            rows.push({ id: rowId, cells });
        }
        tables.push({ id: tableId, rows });
    }
    return tables;
}

export function stripHtml(html) {
    return html
        .replace(/<[^>]+>/g, '')
        .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)))
        .replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number(dec)))
        .replace(/&nbsp;/g, ' ')
        .replace(/&amp;/g, '&')
        .replace(/&lt;/g, '<')
        .replace(/&gt;/g, '>')
        .replace(/&quot;/g, '"')
        .replace(/\s+/g, ' ')
        .trim();
}

export function strikeHtml(html) {
    // Wraps cell content in <s>…</s> if not already struck.
    if (/^\s*<s>[\s\S]*<\/s>\s*$/i.test(html)) return html;
    return `<s>${html}</s>`;
}

const SUFFIX_MULTIPLIERS = { k: 1_000, m: 1_000_000, b: 1_000_000_000 };

export function parseNumber(text) {
    // Accepts "1 234,56 Ft", "1,234.56", "13 519", "1.234,56", "5k", "1.2M" etc. NaN if unparseable.
    const raw = String(text);
    const suffixMatch = raw.match(/([kKmMbB])\b/);
    const multiplier = suffixMatch ? SUFFIX_MULTIPLIERS[suffixMatch[1].toLowerCase()] : 1;
    const stripped = raw.replace(/[^\d,.\- ]/g, '').replace(/\s+/g, '');
    if (!/\d/.test(stripped)) return NaN;
    const lastComma = stripped.lastIndexOf(',');
    const lastDot = stripped.lastIndexOf('.');
    let normalized;
    if (lastComma === -1 && lastDot === -1) {
        normalized = stripped;
    } else if (lastComma > lastDot) {
        normalized = stripped.replace(/\./g, '').replace(',', '.');
    } else {
        normalized = stripped.replace(/,/g, '');
    }
    const n = Number(normalized);
    return Number.isFinite(n) ? n * multiplier : NaN;
}

export function buildPatchReplace(targetId, content) {
    return [{ target: targetId, action: 'replace', content }];
}

export function buildPatchAppend(targetId, content) {
    return [{ target: targetId, action: 'append', content }];
}