I'm learning object oriented programing in a data science context.
I want to understand what good practice is in terms of writing methods within a class that relate to one another.
When I run my code:
import pandas as pd
pd.options.mode.chained_assignment = None
class MyData:
def __init__(self, file_path):
self.file_path = file_path
def prepper_fun(self):
'''Reads in an excel sheet, gets rid of missing values and sets datatype to numerical'''
df = pd.read_excel(self.file_path)
df = df.dropna()
df = df.apply(pd.to_numeric)
self.df = df
return(df)
def quality_fun(self):
'''Checks if any value in any column is more than 10. If it is, the value is replaced with
a warning 'check the original data value'.'''
for col in self.df.columns:
for row in self.df.index:
if self.df[col][row] > 10:
self.df[col][row] = str('check original data value')
return(self.df)
data = MyData('https://archive.ics.uci.edu/ml/machine-learning-databases/00429/Cryotherapy.xlsx')
print(data.prepper_fun())
print(data.quality_fun())
I get the following output (only part of the output is shown due to space constrains):
sex age Time
0 1 35 12.00
1 1 29 7.00
2 1 50 8.00
3 1 32 11.75
4 1 67 9.25
.. ... ... ...
sex age Time
0 1 check original data value check original data value
1 1 check original data value 7
2 1 check original data value 8
3 1 check original data value check original data value
4 1 check original data value 9.25
.. ... ... ...
I am happy with the output generated by each method.
But if I try to call print(data.quality_fun()) without first calling print(data.prepper_fun()), I get an error AttributeError: 'MyData' object has no attribute 'df'.
Being new to objected oriented programming, I am wondering if it is considered good practice to structure things like this, or if there is some other way of doing it.
Thanks for any help!
class feed(object):
# other functions
def new_method(self):
#This can now reference other object attributes
class feed(object):
def __init__ (self, food = 0, water = 0) :
self.food = food
self.water = water
def fill(self):
self.food += 1
self.water += 1
Make sure you have the df before you use it.
class MyData:
def __init__(self, file_path):
self.file_path = file_path
self.df = None
def quality_fun():
if self.df is None:
self.prepper_fun()
# rest of the code
If the csv file isn't changed during runtime you should call prepper_fun(self) in the __init__. , calling it separately leads to a high chance of bugs.
If the csv file is changed then the other answer works perfectly well