
from openpyxl import load_workbook
import pandas as pd
from datetime import datetime

INPUT_FILE = "invoice.xlsx"
OUTPUT_FILE = "Jan_Invoices_Extracted.xlsx"

wb = load_workbook(INPUT_FILE, data_only=True)
writer = pd.ExcelWriter(OUTPUT_FILE, engine="openpyxl")

columns = [
    "To Party",
    "GSTIN",
    "Invoice Number",
    "Invoice Date",
    "Sub Total",
    "Total Amount"
]

pd.DataFrame(columns=columns).to_excel(
    writer, sheet_name="INFO", index=False
)

invoice_index = 1

for ws in wb.worksheets:

    # -------------------------
    # INVOICE DATE (STRING SAFE)
    # -------------------------
    raw_date = ws["H9"].value
    invoice_date = None

    if isinstance(raw_date, datetime):
        invoice_date = raw_date

    elif isinstance(raw_date, str):
        for fmt in ("%d-%m-%Y", "%d/%m/%Y", "%d-%m-%y"):
            try:
                invoice_date = datetime.strptime(raw_date.strip(), fmt)
                break
            except:
                pass

    if not invoice_date:
        continue

    # JANUARY FILTER
    if invoice_date.month != 1:
        continue

    # -------------------------
    # FIND SUB TOTAL & GROSS TOTAL (LABEL BASED)
    # -------------------------
    sub_total = None
    gross_total = None

    for row in ws.iter_rows(min_row=15, max_row=ws.max_row, max_col=12):
        for cell in row:
            if isinstance(cell.value, str):
                label = cell.value.strip().lower()

                if label == "sub total":
                    sub_total = ws.cell(cell.row, cell.column + 1).value

                elif label == "gross total":
                    gross_total = ws.cell(cell.row, cell.column + 1).value

        if sub_total is not None and gross_total is not None:
            break

    data = {
        "To Party": ws["B9"].value,
        "GSTIN": ws["B13"].value,
        "Invoice Number": ws["G9"].value,
        "Invoice Date": invoice_date.date(),
        "Sub Total": sub_total,
        "Total Amount": gross_total
    }

    df = pd.DataFrame([data])
    df.to_excel(
        writer,
        sheet_name=f"Invoice_{invoice_index}",
        index=False
    )

    invoice_index += 1

writer.close()

print(f"SUCCESS: {invoice_index - 1} January invoices extracted")