import { getDepositionLineNumber, isNumber } from "./utils.ts";
import { CurrentPage, PageWithText } from "./types.ts";

export class DepositionParser {
  private text: string;
  private pages: PageWithText[];
  private currentPage?: CurrentPage;

  constructor(text: string) {
    this.text = text;
    this.pages = [];
    this.currentPage = undefined;
  }

  parse = (): PageWithText[] => {
    const lines = this.text
      .split("\n")
      .filter((line) => line && line.trim() !== "");
    lines.forEach((line) => {
      if (this.isNewPage(line)) {
        this.handleNewPage(line);
        return;
      }
      this.handleNewLine(line);
    });
    if (this.currentPage) {
      this.pages.push({
        pageNumber: this.currentPage.pageNumber ?? this.pages.length + 1,
        text: this.currentPage.text,
      });
    }
    return this.pages;
  };

  isNewPage(line: string) {
    const trimmedLine = line.trim();
    if (!isNumber(trimmedLine)) {
      return false;
    }

    // TODO: some depos are 24 lines, some are 25.
    // this means if there's a line 24, then a standalone number, we could have a bug.
    if (
      this.currentPage?.lastParsedLineNumber !== undefined &&
      this.currentPage?.lastParsedLineNumber < 24
    ) {
      return false;
    }
    const possiblePageNumber = Number(trimmedLine);
    const currentPageNumber = this.currentPage?.pageNumber;

    if (possiblePageNumber === 25 && currentPageNumber === undefined) {
      return false;
    }
    if (currentPageNumber === undefined) {
      return true;
    }
    return possiblePageNumber === currentPageNumber + 1;
  }

  handleNewPage(line: string) {
    const newPageNumber = Number(line);
    if (this.currentPage) {
      const previousPageNumber =
        this.currentPage.pageNumber === undefined
          ? newPageNumber - 1
          : this.currentPage.pageNumber;
      this.pages.push({
        pageNumber: previousPageNumber,
        text: this.currentPage.text,
      });
    }

    this.currentPage = {
      pageNumber: newPageNumber || undefined,
      text: "",
      lastParsedLineNumber: undefined,
    };
  }

  handleNewLine(line: string) {
    if (!this.currentPage) {
      this.currentPage = {
        pageNumber: undefined,
        text: "",
        lastParsedLineNumber: undefined,
      };
    }

    const [possibleLineNumber, rest] = getDepositionLineNumber(line);
    if (
      possibleLineNumber !== null &&
      this.isNewDepositionLine(possibleLineNumber)
    ) {
      this.currentPage.text += " " + rest?.trim() ?? "";
      this.currentPage.lastParsedLineNumber = possibleLineNumber;
    } else {
      this.currentPage.text += " " + line;
    }
  }

  isNewDepositionLine(possibleLineNumber: number | null) {
    if (possibleLineNumber === null) {
      return false;
    }
    if (
      this.currentPage?.lastParsedLineNumber === undefined &&
      possibleLineNumber === 1
    ) {
      return true;
    }
    if (this.currentPage?.lastParsedLineNumber === undefined) {
      return false;
    }
    return (
      possibleLineNumber === (this.currentPage?.lastParsedLineNumber || 0) + 1
    );
  }
}
