Python Knowledge Summary

Important Links

Common Functions

Basic Operations

Operation Code
Get all column names list(df), df.columns.tolist(), list(df.columns)
Get data type type(df) (e.g., <class 'pandas.core.frame.DataFrame'>)
Get column types df.dtypes
Uninstall package in Jupyter !pip uninstall package_name -y
Convert ipynb to py in Jupyter import nbconvert then !jupyter nbconvert --to script *.ipynb
Check version package_name.__version__
Change shape with numpy pre = pre[:, np.newaxis] (e.g., from (300,) to (300,1))
Append dataframes df = df1.append(df2.append(df3))
Compute quotient and remainder div, mod = divmod(df['num'], n)
Print usage print("Center is:", center) or print(f"Center is: {center}")

Pandas loc and iloc

# loc: index by labels/names
# iloc: index by integer positions

# Sample data:
      AA  BB  CC  DD  EE
row1   2   3  56  55   4
row2   5   7   4  34   5
row3   9   7   4   7  15
row4   5  72  43  34   5

# loc examples
data1 = data.loc['row2']          # row2 data
data1 = data.loc['row2', :]       # same as above
data2 = data.loc[:, 'BB']         # column BB
data3 = data.loc['row1', 'BB']    # value 3
data4 = data.loc['row2':'row3', 'AA':'DD']  # rows 2-3, columns AA-DD
data5 = data.loc[data.BB > 6]     # rows where BB > 6
data6 = data.loc[data.BB > 6, ['BB', 'CC', 'DD']]  # subset

# iloc examples
data1 = data.iloc[1]              # second row
data1 = data.iloc[1, :]           # same
data2 = data.iloc[:, 1]           # second column
data3 = data.iloc[1, 1]           # row 2, column 2
data4 = data.iloc[1:3, 2:4]       # rows 2-3, columns 3-4 (0-indexed)

Data Types in Python, NumPy, and Pandas

Python's str and NumPy's string/unicode are rperesented as object in Pandas. Strings in Pandas are of type object.

Data type mapping

datetime Conversion

import pandas as pd

# Convert object to datetime
df = pd.DataFrame({'date': ['2011-04-24 01:30:00.000']})
df['date'] = pd.to_datetime(df['date'])
# Result: 0   2011-04-24 01:30:00
# Name: date, dtype: datetime64[ns]

# Convert back to object
df['date'] = df['date'].astype('object')
# Result: same appearance but dtype: object

# Custom format
pd.to_datetime("20110424 01:30:00.000", format='%Y%m%d %H:%M:%S.%f')

Resampling with resample()

# Fill missing time at 1-minute intervals, fill NaN with forward fill
# ffill() uses previous value, interpolate() uses interpolation, bfill() uses next value
df = df.resample('1T').mean().ffill()

# Get 5-minute intervals
# asfreq() or other methods like first()
df5 = df.resample('5T').asfreq()
# Alternative
df5 = df.loc[::5, :]

set_index()

df.set_index(keys, drop=True, append=False, inplace=False, verify_integrity=False)
# keys: column label or list of labels to become index
# drop: if True, remove the column used as index (default True)
# append: if True, append to existing index (default False)
# inplace: if True, modify the original DataFrame (default False)
# verify_integrity: check for duplicates (default False)

reset_index()

# Convert index back to a column
df.reset_index(drop=False, inplace=False, level=None, col_level=0, col_fill='')
# drop: if True, do not insert the old index as a column (default False)
# inplace: if True, modify the original DataFrame (default False)
# level: if MultiIndex, remove only specified level(s)
# col_level, col_fill: for MultiIndex columns

import pandas as pd
import numpy as np

df = pd.DataFrame([('bird', 389.0), ('bird', 24.0), ('mammal', 80.5), ('mammal', np.nan)],
                  index=['falcon', 'parrot', 'lion', 'monkey'],
                  columns=['class', 'max_speed'])
# Original
print(df)
# Reset with index becoming a column
df1 = df.reset_index()
print(df1)
# Reset and drop index
df2 = df.reset_index(drop=True)
print(df2)

reset_index examples reset_index examples reset_index examples

Timing Execution

import time
start_time = time.time()
# your code here
end_time = time.time()
execution_time = end_time - start_time

rename()

# Rename columns and/or index
df.rename(columns={'old_col': 'new_col'}, index={'old_idx': 'new_idx'}, inplace=False)
# Alternatively, use axis parameter
# axis='index' for row labels, axis='columns' for column labels

import pandas as pd
df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})

# Rename columns without axis
df.rename(columns={'A': 'a', 'B': 'c'})
# Returns new DataFrame; original unchanged (inplace=False by default)

# Rename rows and columns simultaneously
df_re = df.rename(columns={'A': 'a', 'B': 'c'}, index={0: '0a', 1: '1a'})
print(df_re)

# Using axis
# Rename rows with axis='index'
df.rename({1: 2, 2: 4}, axis='index')
# Rename columns with axis='columns'
df.rename(str.lower, axis='columns')

Plotting Functions

Mean and Standard Deviation Shaded Plot

Plotting mean and standard deviation with seaborn

Shaded plot

plt.annnotate()

Used to annotate specific points on a plot, typically in a loop.

Reference: matplotlib.pyplot.annotate

import matplotlib.pyplot as plt
import numpy as np

x = np.arange(0, 6)
y = x * x

# Simple annotation on each point
plt.plot(x, y, marker='o')
for xi, yi in zip(x, y):
    plt.annotate(f"({xi},{yi})", xy=(xi, yi), xytext=(-20, 10), textcoords='offset points')
plt.show()

# More advanced annotation with box and arrow
bbox = dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.5)
arrowprops = dict(arrowstyle='->', connectionstyle='arc3,rad=0')
ax.annotate(f'data = ({xi:.1f}, {yi:.1f})',
            (xi, yi), xytext=(-2*10, 10), textcoords='offset points',
            bbox=bbox, arrowprops=arrowprops)

# Combine with scatter to highlight specific points
x1, y1 = [], []
plt.plot(x, y, marker='o')
for xi, yi in zip(x, y):
    if xi > 3:
        plt.annotate(f"({xi},{yi})", xy=(xi, yi), xytext=(-20, 10), textcoords='offset points')
        x1.append(xi)
        y1.append(yi)
plt.scatter(x1, y1, marker='p', color='m')
plt.show()

annotate examples annotate examples annotate examples

Tags: python Pandas matplotlib seaborn Data Visualization

Posted on Wed, 01 Jul 2026 16:38:09 +0000 by himnbandit