CCI-MOC · QuanMPhm · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/process_report/invoices/NERC_total_invoice.py b/process_report/invoices/NERC_total_invoice.py
@@ -12,6 +12,24 @@ class NERCTotalInvoice(invoice.Invoice):
         "University of Rhode Island",
     ]
 
+    export_columns_list = [
+        invoice.INVOICE_DATE_FIELD,
+        invoice.PROJECT_FIELD,
+        invoice.PROJECT_ID_FIELD,
+        invoice.PI_FIELD,
+        invoice.INVOICE_EMAIL_FIELD,
+        invoice.INVOICE_ADDRESS_FIELD,
+        invoice.INSTITUTION_FIELD,
+        invoice.INSTITUTION_ID_FIELD,
+        invoice.SU_HOURS_FIELD,
+        invoice.SU_TYPE_FIELD,
+        invoice.RATE_FIELD,
+        invoice.COST_FIELD,
+        invoice.CREDIT_FIELD,
+        invoice.CREDIT_CODE_FIELD,
+        invoice.BALANCE_FIELD,
+    ]
+
     @property
     def output_path(self) -> str:
         return f"NERC-{self.invoice_month}-Total-Invoice.csv"

diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py
@@ -1,100 +1,39 @@
 from dataclasses import dataclass
 import logging
-import sys
 
 import pandas
 import pyarrow
 
-from process_report.invoices import invoice, discount_invoice
-from process_report import util
-
+from process_report.invoices import invoice
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 
 
 @dataclass
-class BillableInvoice(discount_invoice.DiscountInvoice):
-    NEW_PI_CREDIT_CODE = "0002"
-    INITIAL_CREDIT_AMOUNT = 1000
-    EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
+class BillableInvoice(invoice.Invoice):
     PI_S3_FILEPATH = "PIs/PI.csv"
 
-    nonbillable_pis: list[str]
-    nonbillable_projects: list[str]
-    old_pi_filepath: str
-
-    @staticmethod
-    def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
-        try:
-            old_pi_df = pandas.read_csv(
-                old_pi_filepath,
-                dtype={
-                    invoice.PI_INITIAL_CREDITS: pandas.ArrowDtype(
-                        pyarrow.decimal128(21, 2)
-                    ),
-                    invoice.PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                    invoice.PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
-                },
-            )
-        except FileNotFoundError:
-            sys.exit("Applying credit 0002 failed. Old PI file does not exist")
-
-        return old_pi_df
-
-    @staticmethod
-    def _remove_nonbillables(
-        data: pandas.DataFrame,
-        nonbillable_pis: list[str],
-        nonbillable_projects: list[str],
-    ):
-        return data[
-            ~data[invoice.PI_FIELD].isin(nonbillable_pis)
-            & ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
-        ]
-
-    @staticmethod
-    def _validate_pi_names(data: pandas.DataFrame):
-        invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
-        for i, row in invalid_pi_projects.iterrows():
-            logger.warn(
-                f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
-            )
-        return data[~pandas.isna(data[invoice.PI_FIELD])]
+    export_columns_list = [
+        invoice.INVOICE_DATE_FIELD,
+        invoice.PROJECT_FIELD,
+        invoice.PROJECT_ID_FIELD,
+        invoice.PI_FIELD,
+        invoice.INVOICE_EMAIL_FIELD,
+        invoice.INVOICE_ADDRESS_FIELD,
+        invoice.INSTITUTION_FIELD,
+        invoice.INSTITUTION_ID_FIELD,
+        invoice.SU_HOURS_FIELD,
+        invoice.SU_TYPE_FIELD,
+        invoice.RATE_FIELD,
+        invoice.COST_FIELD,
+        invoice.CREDIT_FIELD,
+        invoice.CREDIT_CODE_FIELD,
+        invoice.BALANCE_FIELD,
+    ]
 
-    @staticmethod
-    def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
-        """Returns time difference between current invoice month and PI's first invoice month
-        I.e 0 for new PIs
-        Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
-        first_invoice_month = old_pi_df.loc[
-            old_pi_df[invoice.PI_PI_FIELD] == pi, invoice.PI_FIRST_MONTH
-        ]
-        if first_invoice_month.empty:
-            return 0
-
-        month_diff = util.get_month_diff(invoice_month, first_invoice_month.iat[0])
-        if month_diff < 0:
-            sys.exit(
-                f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
-            )
-        else:
-            return month_diff
-
-    def _prepare(self):
-        self.data = self._remove_nonbillables(
-            self.data, self.nonbillable_pis, self.nonbillable_projects
-        )
-        self.data = self._validate_pi_names(self.data)
-        self.data[invoice.CREDIT_FIELD] = None
-        self.data[invoice.CREDIT_CODE_FIELD] = None
-        self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
-        self.old_pi_df = self._load_old_pis(self.old_pi_filepath)
-
-    def _process(self):
-        self.data, self.updated_old_pi_df = self._apply_credits_new_pi(
-            self.data, self.old_pi_df
-        )
+    old_pi_filepath: str
+    updated_old_pi_df: pandas.DataFrame
 
     def _prepare_export(self):
         self.updated_old_pi_df = self.updated_old_pi_df.astype(
@@ -114,88 +53,3 @@ def export(self):
     def export_s3(self, s3_bucket):
         super().export_s3(s3_bucket)
         s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH)
-
-    def _apply_credits_new_pi(
-        self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame
-    ):
-        def get_initial_credit_amount(
-            old_pi_df, invoice_month, default_initial_credit_amount
-        ):
-            first_month_processed_pis = old_pi_df[
-                old_pi_df[invoice.PI_FIRST_MONTH] == invoice_month
-            ]
-            if first_month_processed_pis[
-                invoice.PI_INITIAL_CREDITS
-            ].empty or pandas.isna(
-                new_pi_credit_amount := first_month_processed_pis[
-                    invoice.PI_INITIAL_CREDITS
-                ].iat[0]
-            ):
-                new_pi_credit_amount = default_initial_credit_amount
-
-            return new_pi_credit_amount
-
-        new_pi_credit_amount = get_initial_credit_amount(
-            old_pi_df, self.invoice_month, self.INITIAL_CREDIT_AMOUNT
-        )
-        print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}")
-
-        current_pi_set = set(data[invoice.PI_FIELD])
-        for pi in current_pi_set:
-            credit_eligible_projects = data[
-                (data[invoice.PI_FIELD] == pi)
-                & ~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))
-            ]
-            pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
-            pi_old_pi_entry = old_pi_df.loc[
-                old_pi_df[invoice.PI_PI_FIELD] == pi
-            ].squeeze()
-
-            if pi_age > 1:
-                for i, row in credit_eligible_projects.iterrows():
-                    data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
-            else:
-                if pi_age == 0:
-                    if len(pi_old_pi_entry) == 0:
-                        pi_entry = [pi, self.invoice_month, new_pi_credit_amount, 0, 0]
-                        old_pi_df = pandas.concat(
-                            [
-                                pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
-                                old_pi_df,
-                            ],
-                            ignore_index=True,
-                        )
-                        pi_old_pi_entry = old_pi_df.loc[
-                            old_pi_df[invoice.PI_PI_FIELD] == pi
-                        ].squeeze()
-
-                    remaining_credit = new_pi_credit_amount
-                    credit_used_field = invoice.PI_1ST_USED
-                elif pi_age == 1:
-                    remaining_credit = (
-                        pi_old_pi_entry[invoice.PI_INITIAL_CREDITS]
-                        - pi_old_pi_entry[invoice.PI_1ST_USED]
-                    )
-                    credit_used_field = invoice.PI_2ND_USED
-
-                credits_used = self.apply_flat_discount(
-                    data,
-                    credit_eligible_projects,
-                    remaining_credit,
-                    invoice.CREDIT_FIELD,
-                    invoice.BALANCE_FIELD,
-                    invoice.CREDIT_CODE_FIELD,
-                    self.NEW_PI_CREDIT_CODE,
-                )
-
-                if (pi_old_pi_entry[credit_used_field] != 0) and (
-                    credits_used != pi_old_pi_entry[credit_used_field]
-                ):
-                    print(
-                        f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}"
-                    )
-                old_pi_df.loc[
-                    old_pi_df[invoice.PI_PI_FIELD] == pi, credit_used_field
-                ] = credits_used
-
-        return (data, old_pi_df)
diff --git a/process_report/invoices/bu_internal_invoice.py b/process_report/invoices/bu_internal_invoice.py
@@ -1,50 +1,41 @@
 from dataclasses import dataclass
-from decimal import Decimal
 
-import process_report.invoices.invoice as invoice
-import process_report.invoices.discount_invoice as discount_invoice
+from process_report.invoices import invoice
 
 
 @dataclass
-class BUInternalInvoice(discount_invoice.DiscountInvoice):
-    subsidy_amount: int
-
-    def _prepare(self):
-        def get_project(row):
-            project_alloc = row[invoice.PROJECT_FIELD]
-            if project_alloc.rfind("-") == -1:
-                return project_alloc
-            else:
-                return project_alloc[: project_alloc.rfind("-")]
+class BUInternalInvoice(invoice.Invoice):
+    export_columns_list = [
+        invoice.INVOICE_DATE_FIELD,
+        invoice.PI_FIELD,
+        "Project",
+        invoice.COST_FIELD,
+        invoice.CREDIT_FIELD,
+        invoice.BU_BALANCE_FIELD,
+        invoice.PI_BALANCE_FIELD,
+    ]
+    exported_columns_map = {
+        invoice.BU_BALANCE_FIELD: "Subsidy",
+        invoice.PI_BALANCE_FIELD: "Balance",
+    }
 
-        self.data = self.data[
-            self.data[invoice.INSTITUTION_FIELD] == "Boston University"
-        ].copy()
-        self.data["Project"] = self.data.apply(get_project, axis=1)
-        self.data[invoice.SUBSIDY_FIELD] = Decimal(0)
-        self.data = self.data[
-            [
-                invoice.INVOICE_DATE_FIELD,
-                invoice.PI_FIELD,
-                "Project",
-                invoice.COST_FIELD,
-                invoice.CREDIT_FIELD,
-                invoice.SUBSIDY_FIELD,
-                invoice.BALANCE_FIELD,
-            ]
-        ]
+    subsidy_amount: int
 
-    def _process(self):
-        data_summed_projects = self._sum_project_allocations(self.data)
-        self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount)
+    def _prepare_export(self):
+        self.data = self._sum_project_allocations(self.data)
 
     def _sum_project_allocations(self, dataframe):
         """A project may have multiple allocations, and therefore multiple rows
         in the raw invoices. For BU-Internal invoice, we only want 1 row for
         each unique project, summing up its allocations' costs"""
         project_list = dataframe["Project"].unique()
         data_no_dup = dataframe.drop_duplicates("Project", inplace=False)
-        sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
+        sum_fields = [
+            invoice.COST_FIELD,
+            invoice.CREDIT_FIELD,
+            invoice.BU_BALANCE_FIELD,
+            invoice.PI_BALANCE_FIELD,
+        ]
         for project in project_list:
             project_mask = dataframe["Project"] == project
             no_dup_project_mask = data_no_dup["Project"] == project
@@ -53,18 +44,3 @@ def _sum_project_allocations(self, dataframe):
             data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums
 
         return data_no_dup
-
-    def _apply_subsidy(self, dataframe, subsidy_amount):
-        pi_list = dataframe[invoice.PI_FIELD].unique()
-
-        for pi in pi_list:
-            pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
-            self.apply_flat_discount(
-                dataframe,
-                pi_projects,
-                subsidy_amount,
-                invoice.SUBSIDY_FIELD,
-                invoice.BALANCE_FIELD,
-            )
-
-        return dataframe
diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py
@@ -23,16 +23,25 @@
 INSTITUTION_ID_FIELD = "Institution - Specific Code"
 SU_HOURS_FIELD = "SU Hours (GBhr or SUhr)"
 SU_TYPE_FIELD = "SU Type"
+RATE_FIELD = "Rate"
 COST_FIELD = "Cost"
 CREDIT_FIELD = "Credit"
 CREDIT_CODE_FIELD = "Credit Code"
 SUBSIDY_FIELD = "Subsidy"
 BALANCE_FIELD = "Balance"
 ###
 
+### Invoice additional fields (not used in exporting)
+PI_BALANCE_FIELD = "PI Balance"
+BU_BALANCE_FIELD = "BU Balance"
+###
+
 
 @dataclass
 class Invoice:
+    export_columns_list = list()
+    exported_columns_map = dict()
+
     name: str
     invoice_month: str
     data: pandas.DataFrame
@@ -78,7 +87,14 @@ def _prepare_export(self):
         that should or should not be exported after processing."""
         pass
 
+    def _filter_columns(self):
+        """Filters and renames columns before exporting"""
+        self.data = self.data[self.export_columns_list].rename(
+            columns=self.exported_columns_map
+        )
+
     def export(self):
+        self._filter_columns()
         self.data.to_csv(self.output_path, index=False)
 
     def export_s3(self, s3_bucket):