leetcode - pandas

import pandas as pd

def createDataframe(student_data: List[List[int]]) -> pd.DataFrame:
    student_df = pd.DataFrame(student_data , columns=("student_id","age"))
    return student_df;
if __name__ == '__main__':
    student_data = [
        [1, 15],
        [2, 11],
        [3, 11],
        [4, 20]
    ]
    df = createDataframe(student_data) #assign the returned dataframe to df.
    print(df)




import pandas as pd

def getDataframeSize(players: pd.DataFrame) -> List[int]:
  return list(players.shape);

  if '__name__' == __main__:
    df=pd.DataFrame
    rows , columns = getDataframeSize(df)
    print(f"This DataFrame contains {rows} rows and {columns} columns.")

Explanation: 
(players.shape)
Here, players is the dataframe 
In pandas, the shape The attribute of a DataFrame returns a tuple.


import pandas as pd

def selectFirstRows(employees: pd.DataFrame) -> pd.DataFrame:
    return employees.head(3);

if __name__ == '__main__':
    df=pd.DataFrame
    First_3_rows = selectFirstRows(df)
    print(employees)


answer:
import pandas as pd
def selectData(students: pd.DataFrame):
    student_101 = students.query('student_id == 101')    
    If not student_101.empty:
        return student_101[['name', 'age']]
    else:
        return df
if __name__ =='__main__':
    df= pd.DataFrame
    student=selectData(df)
    print("\nRows name AND age (using .query()):\n", student)

explanation :
To query we have to use df.query()
Rows name AND age (using .query())
This is a descriptive string that explains what the following output represents. It indicates that the output will show the "name" and "age" columns of a DataFrame, and that the data was selected using the .query() method in pandas.



import pandas as pd

def createBonusColumn(employees: pd.DataFrame):
    employees['bonus'] = employees['salary'] * 2
    return employees

if __name__ =='__main__':
    df = pd.DataFrame(data)
    call_df = createBonusColumn(df)
    print(call_df)  

explanation :
We can create the new column in the dataframe by 
df[column name] = values 
here 
df[coloumn name ] = df[col1_values]*2


import pandas as pd

def dropDuplicateEmails(customers: pd.DataFrame):
 non_duplicated_df=customers.drop_duplicates('email')
 return non_duplicated_df

if __name__ == '__main__':
    df=pd.DataFrame
    call_df=dropDuplicateEmails(df)
    print(non_duplicated_df)

explanation : 
To remove duplicates, use drop.duplicates()
non_duplicated_df=customers.drop_duplicates('email') --> removing only the duplicates in email coloumn 


import pandas as pd

def dropMissingData(students: pd.DataFrame):
    new_df= students.dropna(subset = ['name'])
    return new_df
if __name__ == '__main__':
 df=  pd.DataFrame
 call_df = dropMissingData(df)
 print(new_df)  


dropna() is to drop the missing data in the dataframe 
subset(['col_name ']) specifies the particular column 


import pandas as pd

def modifySalaryColumn(employees: pd.DataFrame) -> pd.DataFrame:
    employees['salary'] = employees['salary'].apply(lambda salary: salary * 2)
    return employees
if __name__ == '__main__':
    df = pd.DataFrame
    call_df = modifySalaryColumn(df)
    print(call_df)

explanation:
employees['salary'] = employees['salary'].apply(lambda salary: salary * 2)
If we want to modify the column values we have to use lambda functions 


import pandas as pd

def renameColumns(students: pd.DataFrame) -> pd.DataFrame:
    renamed_students = students.rename(columns={
        'id': 'student_id',
        'first': 'first_name',
        'last': 'last_name',
        'age': 'age_in_years'
    })
    return renamed_students

if __name__ == '__main__':
    df= pd.DataFrame
    call_df = renameColumns(df)
    print (call_df)

explanation : rename (columns ={values}) to change the name
 


answer:
import pandas as pd
def changeDatatype(students: pd.DataFrame) -> pd.DataFrame:
   students['grade'] = students['grade'].astype(int)
   return students
if __name__ == '__main__':
    df = pd.DataFrame
    call_df = changeDatatype(df)
explanation :

astype() is used for explicit conversion of the datatype