import microMemoize from "micro-memoize";

type TableRowItemType = "TD" | "TH";

interface TableRowItem {
  content: string;
  type: TableRowItemType;
  rowSpan: number;
  colSpan: number;
}

export interface TableRow {
  items: TableRowItem[];
  type: "head" | "body";
}

export interface TableContents {
  rowCount: number;
  rows: TableRow[];
}

const parseRowElements = (
  rowElements: HTMLCollectionOf<HTMLTableRowElement>,
): TableRow[] => {
  const rows: TableRow[] = [];
  for (let i = 0; i < rowElements.length; i++) {
    const { children } = rowElements[i];
    const newRow: TableRow = { type: "head", items: [] };
    for (let j = 0; j < children.length; j++) {
      const child = children[j] as
        | HTMLTableHeaderCellElement
        | HTMLTableDataCellElement;
      newRow.type =
        (child.tagName as TableRowItemType) === "TH" ? "head" : "body";
      newRow.items.push({
        content: child.textContent || "",
        type: child.tagName as TableRowItemType,
        rowSpan: child.rowSpan,
        colSpan: child.colSpan,
      });
    }
    rows.push(newRow);
  }
  return rows;
};

// Jupyter inserts a string of form "m rows x n columns" in a paragraph tag at the end
// of its HTML output
const parseRowCount = (countElement: HTMLParagraphElement): number => {
  const text = countElement.textContent || "";
  const rowCountString = text?.split(" ")[0];
  return parseInt(rowCountString);
};

const _parseTableContents = (
  unJsonParsedHtmlString: string,
): TableContents | null => {
  const parser = new DOMParser();
  const parserElement = parser.parseFromString(
    JSON.parse(unJsonParsedHtmlString),
    "text/html",
  );
  const thead = parserElement.getElementsByTagName("thead")[0];

  // If it's not a table, return nothing
  if (thead == null) {
    return null;
  }

  const styleTag = parserElement.getElementsByTagName("style")[0];

  const styleString = styleTag?.innerText.trim() ?? "";
  const tableClass = parserElement.getElementsByTagName("table")[0].className;
  // HACK, raw dataframe html starts with a .dataframe class
  // while styled dataframes don't. We fallback to raw HTML
  // if using styled dataframes
  if (
    tableClass !== "dataframe" ||
    (styleString && !styleString.startsWith(".dataframe"))
  ) {
    return null;
  }

  const tbody = parserElement.getElementsByTagName("tbody")[0];
  const headerRowElements = thead.getElementsByTagName("tr");
  const bodyRowElements = tbody?.getElementsByTagName("tr");

  const headerRows = parseRowElements(headerRowElements);
  const bodyRows =
    bodyRowElements != null ? parseRowElements(bodyRowElements) : [];
  const allRows = headerRows.concat(bodyRows);

  let rowCount = bodyRows.length;
  // If there are enough rows to fit in the screen without ellipses, it won't print the row summary
  const rowCountElements = parserElement.getElementsByTagName("p");
  if (rowCountElements.length > 0) {
    rowCount = parseRowCount(rowCountElements[0]);
  }

  return {
    rowCount,
    rows: allRows,
  };
};

// `_parseTableContents` above can be a bit expensive, especially if
// there are many rendered tables on a page. So we globally memoize it,
// this gives benefits above just using `useMemo`, since outline,
// logic, and app views can all share the same memoized cache.
export const parseTableContents = microMemoize(_parseTableContents, {
  // We don't want this cache to end up using _too_ much memory,
  // so limit it to 50 tables worth of content.
  maxSize: 50,
});

const _isTableHtmlString = (htmlString: string): boolean => {
  const parser = new DOMParser();
  const parserElement = parser.parseFromString(htmlString, "text/html");
  const tableElement = parserElement.getElementsByTagName("table")[0];
  const isTable = tableElement != null;

  return isTable;
};
// See comments above on `parseTableContents`
export const isTableHtmlString = microMemoize(_isTableHtmlString, {
  maxSize: 50,
});
