Let's start by creating a sample DataFrame:
import pandas as pd
# Create sample DataFrame
employee_data = {
'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
'Age': [24, 27, 22, 32, 29],
'Location': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
'Compensation': [70000, 80000, 60000, 90000, 85000]
}
df = pd.DataFrame(employee_data)
print("Original DataFrame:")
print(df)
1. Display DataFrame metadata
print("\n1. Display DataFrame metadata:")
print(df.info())
2. View top rows
print("\n2. View top rows:")
print(df.head(3))
3. View bottom rows
print("\n3. View bottom rows:")
print(df.tail(3))
4. Generate statistical summary
print("\n4. Generate statistical summary:")
print(df.describe())
5. Select single column
print("\n5. Select single column:")
print(df['Age'])
6. Select multiple columns
print("\n6. Select multiple columns:")
print(df[['Employee', 'Compensation']])
7. Select specific row
print("\n7. Select specific row:")
print(df.loc[1])
8. Conditional row filtering
print("\n8. Conditional row filtering:")
print(df[df['Age'] > 25])
9. Add new column
print("\n9. Add new column:")
df['ExperienceYears'] = [1, 5, 2, 8, 6]
print(df)
10. Remove column
print("\n10. Remove column:")
df = df.drop('ExperienceYears', axis=1)
print(df)
11. Delete rows
print("\n11. Delete rows:")
df = df.drop(4)
print(df)
12. Rename columns
print("\n12. Rename columns:")
df = df.rename(columns={'Employee': 'Full Name'})
print(df)
13. Set custom index
print("\n13. Set custom index:")
df = df.set_index('Full Name')
print(df)
14. Reset index
print("\n14. Reset index:")
df = df.reset_index()
print(df)
15. Check for misssing values
print("\n15. Check for missing values:")
print(df.isnull())
16. Fill missing values
print("\n16. Fill missing values:")
df.loc[2, 'Location'] = None
df['Location'] = df['Location'].fillna('Undisclosed')
print(df)
17. Remove null values
print("\n17. Remove null values:")
df.loc[2, 'Location'] = None
df = df.dropna()
print(df)
18. Sort by values
print("\n18. Sort by values:")
print(df.sort_values(by='Compensation', ascending=False))
19. Sort by index
print("\n19. Sort by index:")
print(df.sort_index())
20. Group data
print("\n20. Group data:")
grouped_data = df.groupby('Location').mean()
print(grouped_data)
21. Merge DataFrames
print("\n21. Merge DataFrames:")
df_a = pd.DataFrame({
'Employee': ['Alice', 'Bob'],
'Interest': ['Reading', 'Swimming']
})
combined = pd.merge(df, df_a, left_on='Full Name', right_on='Employee', how='left')
print(combined)
22. Join DataFrames
print("\n22. Join DataFrames:")
df_b = pd.DataFrame({
'Full Name': ['Alice', 'Bob'],
'Interest': ['Reading', 'Swimming']
})
joined_data = df.join(df_b.set_index('Full Name'), on='Full Name', how='left')
print(joined_data)
23. Create pivot table
print("\n23. Create pivot table:")
pivot_table = df.pivot_table(values='Compensation', index='Location', aggfunc='mean')
print(pivot_table)
24. Transpose DataFrame
print("\n24. Transpose DataFrame:")
print(df.T)
25. Calcluate column mean
print("\n25. Calculate column mean:")
print(df['Age'].mean())
26. Calculate column median
print("\n26. Calculate column median:")
print(df['Age'].median())
27. Calculate column standard deviation
print("\n27. Calculate column standard deviation:")
print(df['Compensation'].std())
28. Apply custom function to column
print("\n28. Apply custom function to column:")
df['Decade Age'] = df['Age'].apply(lambda x: x + 10)
print(df)
29. Get unique values
print("\n29. Get unique values:")
print(df['Location'].unique())
30. Count value frequencies
print("\n30. Count value frequencies:")
print(df['Location'].value_counts())