import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import rc


df = pd.read_csv('C:\\Users\\antho\\Dev\\tickets\\tickets\\tickets.csv', header=None)

df


df = pd.read_csv('C:\\Users\\antho\\Dev\\tickets\\tickets\\tickets.csv', header=None, names = ['ID', 'Vendor', 'Subtotal', 'Service_Fee', 'Additional_Taxes', 'Total'])

df


df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                3 non-null      int64  
 1   Vendor            3 non-null      object 
 2   Subtotal          3 non-null      int64  
 3   Service_Fee       3 non-null      float64
 4   Additional_Taxes  3 non-null      float64
 5   Total             3 non-null      float64
dtypes: float64(3), int64(2), object(1)
memory usage: 272.0+ bytes


df.describe()


# Pieplot of total costs
names=tuple(list(df['Vendor']))
values=np.array(df['Total'])
plt.pie(values, labels=names, labeldistance=1.15)
plt.show()


# Barplot of total costs
names=tuple(list(df['Vendor']))
values=np.array(df['Total'])
y_pos=np.arange(len(names))
plt.bar(y_pos, values)
plt.xticks(y_pos, names)
plt.ylabel('Cost')
plt.xlabel('Vendor')
plt.title('Total Ticket Cost')
plt.show()


# Transpose df to crate arrays for bars.  Doing it this way becuase I hardly ever transpose arrays, so I just wanted to do it this way.
dft = df.T
dft = dft.drop(['ID','Vendor'])


# Stacked Barplot of costs
names= np.array(df['Vendor'])
Subtotal = np.array(dft.iloc[0])
Service_Fee = np.array(dft.iloc[1])
Additional_Taxes = np.array(dft.iloc[2])

bars = np.add(Subtotal, Service_Fee).tolist()

r = [0,1,2]

barWidth=1

plt.bar(r, Subtotal, color='#00ae7d', edgecolor='white', width=barWidth, label = 'Subtotal')
plt.bar(r, Service_Fee, bottom = Subtotal, color='#98ff6e', edgecolor='white', width = barWidth, label = 'Service Fee')
plt.bar(r, Additional_Taxes, bottom=bars, color='#ffe54a', edgecolor='white', width=barWidth, label = 'Additional Taxes')

plt.xticks(r, names, fontweight='bold')
plt.xlabel("group")

# Create legend and title
plt.legend()
plt.ylabel('Cost by Category')
plt.xlabel('Vendor')
plt.title('Total Ticket Cost Breakdown')

plt.show()


# Stacked Barplot of costs
names= np.array(df['Vendor'])
Subtotal = np.array(dft.iloc[0])
Service_Fee = np.array(dft.iloc[1])
Additional_Taxes = np.array(dft.iloc[2])

bars = np.add(Additional_Taxes, Service_Fee).tolist()

r = [0,1,2]

barWidth=1

plt.bar(r, Service_Fee, color='#98ff6e', edgecolor='white', width = barWidth, label = 'Service Fee')
plt.bar(r, Additional_Taxes, bottom=Service_Fee, color='#ffe54a', edgecolor='white', width=barWidth, label = 'Additional Taxes')
plt.bar(r, Subtotal, bottom = bars, color='#00ae7d', edgecolor='white', width=barWidth, label = 'Subtotal')

plt.xticks(r, names, fontweight='bold')
plt.xlabel("group")

# Create legend
plt.legend()
plt.ylabel('Cost by Category')
plt.xlabel('Vendor')

plt.show()


# Stacked Barplot of costs

r = [0,1,2]
Subtotal = np.array(dft.iloc[0])
Service_Fee = np.array(dft.iloc[1])
Additional_Taxes = np.array(dft.iloc[2])

totals = [i+j+k for i,j,k in zip(Subtotal, Service_Fee, Additional_Taxes)]
Subtotal_ = [i/j*100 for i,j in zip(Subtotal, totals)]
Service_Fee_ = [i/j*100 for i,j in zip(Service_Fee, totals)]
Additional_Taxes_ = [i/j*100 for i,j in zip(Additional_Taxes, totals)]

barWidth=0.85
names= np.array(df['Vendor'])

plt.bar(r, Subtotal_, color='#00ae7d', edgecolor='white', width=barWidth, label = 'Subtotal')
plt.bar(r, Service_Fee_, bottom = Subtotal_, color='#98ff6e', edgecolor='white', width = barWidth, label = 'Service Fee')
plt.bar(r, Additional_Taxes_, bottom=[i+j for i,j in zip(Subtotal_, Service_Fee_)], color='#ffe54a', edgecolor='white', width=barWidth, label = 'Additional Taxes')

plt.xticks(r, names, fontweight='bold')
plt.xlabel("group")

# Create legend
plt.legend()
plt.ylabel('Cost Percentage by Category')
plt.xlabel('Vendor')

plt.show()


# Quick look at the dataframe so we don't have to scroll all the way up
df


TM_fees = df.iloc[0,3]+df.iloc[0,4]
ET_fees = df.iloc[1,3]+df.iloc[1,4]
TW_fees = df.iloc[2,3]+df.iloc[2,4]


print("Raw numbers (not percentage):")
print("TM combined fees are ", TM_fees)
print("ET combined fees are ", ET_fees)
print("TW combined fees are ", TW_fees)

Raw numbers (not percentage):
TM combined fees are  12.23
ET combined fees are  14.860000000000001
TW combined fees are  16.6


from decimal import *
getcontext().prec = 4 #set precision to two decimal places
print("Raw numbers (not percentage):")
print("TM combined fees are ", TM_fees)
ET_fees = Decimal(df.iloc[1,3])+Decimal(df.iloc[1,4]) #redo the math using Decimal()
print("ET combined fees are ", ET_fees)
print("TW combined fees are ", TW_fees)

Raw numbers (not percentage):
TM combined fees are  12.23
ET combined fees are  14.86
TW combined fees are  16.6


TM_pct = Service_Fee_[0] + Additional_Taxes_[0]
ET_pct = Service_Fee_[1] + Additional_Taxes_[1]
TW_pct = Service_Fee_[2] + Additional_Taxes_[2]


print("Percentage of Totals which are Combined Fees:")
print("TM combined fees percentage = ", TM_pct)
print("ET combined fees percentage = ", ET_pct)
print("TW combined fees percentage = ", TW_pct)

Percentage of Totals which are Combined Fees:
TM combined fees percentage =  22.143762447944955
ET combined fees percentage =  29.217459693275657
TW combined fees percentage =  21.957671957671963

	0	1	2	3	4	5
0	1	TM	43	8.35	3.88	55.23
1	2	ET	36	13.38	1.48	50.86
2	3	TW	59	16.60	0.00	75.60

	ID	Subtotal	Service_Fee	Additional_Taxes	Total
count	3.0	3.000000	3.000000	3.000000	3.000000
mean	2.0	46.000000	12.776667	1.786667	60.563333
std	1.0	11.789826	4.157960	1.958094	13.204175
min	1.0	36.000000	8.350000	0.000000	50.860000
25%	1.5	39.500000	10.865000	0.740000	53.045000
50%	2.0	43.000000	13.380000	1.480000	55.230000
75%	2.5	51.000000	14.990000	2.680000	65.415000
max	3.0	59.000000	16.600000	3.880000	75.600000

Which Vendor is Charging Too Much?¶

I decided to look at a few things:¶

1. Loading data in MySQL¶

2. Looking at the data¶

3. Comparing total and add-on costs across vendors¶

4. Comparing base and add-on costs as a percentage of the total cost¶

Takeaways & Recommendation:¶