๐Ÿ Python 3 Data Analytics Cheatsheet


๐Ÿ› ๏ธ 1. Setup & Environment

# Install packages
pip install pandas numpy matplotlib seaborn plotly scikit-learn jupyter

# Start Jupyter
jupyter notebook

๐Ÿ“Š 2. Data Handling with Pandas

import pandas as pd

# Read data
df = pd.read_csv('data.csv')       # Also: .xlsx, .json, .html, .sql
df.head(), df.tail()
df.shape, df.info(), df.describe()

# Selecting columns/rows
df['col'], df[['col1', 'col2']]
df.iloc[0], df.loc[0]
df[df['col'] > 100]

# Data cleaning
df.dropna(), df.fillna(0), df.drop_duplicates()
df['col'].replace({'N/A': None}, inplace=True)
df.columns = [c.lower().strip() for c in df.columns]

# Type conversion
df['date'] = pd.to_datetime(df['date'])
df['col'] = df['col'].astype(int)

# Feature engineering
df['new_col'] = df['col1'] / df['col2']
df['year'] = df['date'].dt.year

# Grouping & Aggregation
df.groupby('col').agg({'val': ['mean', 'sum']})
df.pivot_table(index='A', columns='B', values='C', aggfunc='sum')

๐Ÿ”ข 3. Numerical Computing with NumPy

import numpy as np

arr = np.array([1, 2, 3])
arr.shape, arr.dtype
arr.reshape(3, 1), arr.flatten()

# Operations
arr.mean(), arr.std(), np.median(arr)
np.dot(arr1, arr2)
np.where(arr > 2, 1, 0)

๐Ÿ“ˆ 4. Visualization

Matplotlib

import matplotlib.pyplot as plt

plt.plot(df['x'], df['y'])
plt.bar(df['cat'], df['val'])
plt.hist(df['val'], bins=20)
plt.scatter(df['x'], df['y'])

plt.title("Title"), plt.xlabel("X"), plt.ylabel("Y")
plt.legend(), plt.grid(), plt.show()

Seaborn

import seaborn as sns

sns.boxplot(data=df, x='category', y='value')
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
sns.pairplot(df, hue='target')

Plotly (Interactive)

import plotly.express as px

px.scatter(df, x='x', y='y', color='label')
px.bar(df, x='category', y='value')

๐Ÿงน 5. Data Preprocessing

from sklearn.preprocessing import StandardScaler, LabelEncoder

# Standardize
scaler = StandardScaler()
df[['num1', 'num2']] = scaler.fit_transform(df[['num1', 'num2']])

# Encode categorical
le = LabelEncoder()
df['cat_encoded'] = le.fit_transform(df['category'])

# One-hot encoding
df = pd.get_dummies(df, columns=['category'])

๐Ÿง  6. Machine Learning with Scikit-learn

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Split
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predict & Evaluate
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

๐Ÿ” 7. Exploratory Data Analysis (EDA)

df.describe(include='all')
df['target'].value_counts()
df.corr(numeric_only=True)

sns.heatmap(df.corr(), annot=True)
sns.pairplot(df)

๐Ÿงช 8. Statistical Analysis

from scipy import stats

# t-test
stats.ttest_ind(df[df['group'] == 'A']['score'], df[df['group'] == 'B']['score'])

# Chi-squared test
from scipy.stats import chi2_contingency
chi2_contingency(pd.crosstab(df['gender'], df['response']))

๐Ÿ“‚ 9. Working with Files & SQL

# CSV/Excel/JSON
df.to_csv('cleaned.csv', index=False)
df.to_excel('data.xlsx')

# SQL
import sqlite3
conn = sqlite3.connect('data.db')
df.to_sql('table_name', conn, if_exists='replace')

# Querying from SQL
pd.read_sql('SELECT * FROM table_name', conn)

๐ŸŒ 10. Web Scraping (Bonus)

import requests
from bs4 import BeautifulSoup

url = "https://example.com"
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
titles = soup.find_all('h2')

๐Ÿ“Œ 11. Time Series Basics

df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

df.resample('M').mean()
df['rolling_mean'] = df['value'].rolling(window=3).mean()

๐Ÿงฐ 12. Useful Utilities

# Progress bar
from tqdm import tqdm
for i in tqdm(range(1000)): ...

# Profiling
pip install pandas-profiling
from pandas_profiling import ProfileReport
profile = ProfileReport(df)
profile.to_file("eda_report.html")

๐Ÿ“š 13. Resources


Previous Article

Cursor AI Discontinues Free Student Plan for Indian Users Without Official Statement

Write a Comment

Leave a Comment

Your email address will not be published. Required fields are marked *

Subscribe to our Newsletter

Subscribe to our email newsletter to get the latest posts delivered right to your email.
Pure inspiration, zero spam โœจ