osm-labo/find_largest_commit.sh

222 lines
6.7 KiB
Bash
Raw Normal View History

2025-08-31 17:57:28 +02:00
#!/bin/bash
# Script to find which commit made the biggest change in repository size
#
# This script analyzes git commit history to determine which commit caused
# the largest change in the repository's size. It checks out each commit,
# measures the repository size, and identifies the commit with the biggest
# size difference.
#
# Usage: ./find_largest_commit.sh [number_of_commits_to_check]
#
# Arguments:
# number_of_commits_to_check: Optional. Number of recent commits to analyze.
# Defaults to 100 if not specified.
#
# Output:
# - Detailed information about the commit with the largest size change
# - A CSV file with data for all analyzed commits
#
# Requirements:
# - git
# - bc (for floating-point calculations)
# - du (for measuring directory sizes)
#
# Author: Junie (JetBrains AI)
# Date: 2025-08-31
# Exit on error
set -e
# Trap for cleanup in case of unexpected exit
trap cleanup EXIT
cleanup() {
# Make sure we return to the original branch
if [ -n "$CURRENT_BRANCH" ]; then
git checkout -q "$CURRENT_BRANCH" 2>/dev/null || true
# Restore stashed changes if needed
if [ "$STASH_NEEDED" = true ]; then
echo "Restoring stashed changes..."
git stash pop -q 2>/dev/null || true
fi
fi
}
# Default to checking the last 100 commits if not specified
NUM_COMMITS=${1:-100}
# Validate input
if ! [[ "$NUM_COMMITS" =~ ^[0-9]+$ ]]; then
echo "Error: Number of commits must be a positive integer."
echo "Usage: $0 [number_of_commits_to_check]"
exit 1
fi
if [ "$NUM_COMMITS" -lt 1 ]; then
echo "Error: Number of commits must be at least 1."
echo "Usage: $0 [number_of_commits_to_check]"
exit 1
fi
echo "Analyzing the last $NUM_COMMITS commits to find the largest size change..."
echo "This may take some time depending on repository size and history."
echo
# Get the list of commit hashes
COMMITS=$(git log --pretty=format:"%H" -n "$NUM_COMMITS")
# Initialize variables to track the largest change
LARGEST_CHANGE=0
LARGEST_COMMIT=""
LARGEST_SIZE_BEFORE=0
LARGEST_SIZE_AFTER=0
# Function to get repository size at a specific commit
get_repo_size() {
local commit=$1
# Checkout the commit
git checkout -q "$commit"
# Calculate size in bytes (excluding .git directory)
local size=$(du -sb --exclude=.git . | cut -f1)
echo "$size"
}
# Store current branch to return to it later
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
# Check if there are uncommitted changes
if [[ -n $(git status -s) ]]; then
echo "Stashing uncommitted changes before proceeding..."
STASH_NEEDED=true
git stash push -m "Temporary stash by find_largest_commit.sh script"
else
STASH_NEEDED=false
fi
# Temporary file to store results
TEMP_FILE=$(mktemp)
echo "Commit Hash,Author,Date,Size Before (bytes),Size After (bytes),Change (bytes),Change (%),Message" > "$TEMP_FILE"
# Counter for progress display
COUNTER=0
TOTAL_COMMITS=$(echo "$COMMITS" | wc -l)
# Process each commit
PREV_SIZE=""
for COMMIT in $COMMITS; do
COUNTER=$((COUNTER + 1))
echo -ne "Processing commit $COUNTER/$TOTAL_COMMITS...\r"
# Get commit details
AUTHOR=$(git show -s --format="%an" "$COMMIT")
DATE=$(git show -s --format="%cd" --date=format:"%Y-%m-%d %H:%M:%S" "$COMMIT")
MESSAGE=$(git show -s --format="%s" "$COMMIT" | sed 's/,/;/g') # Replace commas with semicolons
# Get size after this commit
SIZE_AFTER=$(get_repo_size "$COMMIT")
# If this is the first commit we're checking, we don't have a previous size
if [ -z "$PREV_SIZE" ]; then
PREV_SIZE="$SIZE_AFTER"
continue
fi
# Calculate size before (which is the size after the previous commit)
SIZE_BEFORE="$PREV_SIZE"
PREV_SIZE="$SIZE_AFTER"
# Calculate change
CHANGE=$((SIZE_AFTER - SIZE_BEFORE))
ABS_CHANGE=${CHANGE#-} # Absolute value
# Calculate percentage change
if [ "$SIZE_BEFORE" -ne 0 ]; then
PERCENT_CHANGE=$(echo "scale=2; 100 * $CHANGE / $SIZE_BEFORE" | bc)
else
PERCENT_CHANGE="N/A"
fi
# Record the data
echo "$COMMIT,$AUTHOR,$DATE,$SIZE_BEFORE,$SIZE_AFTER,$CHANGE,$PERCENT_CHANGE%,$MESSAGE" >> "$TEMP_FILE"
# Check if this is the largest change so far
if [ "$ABS_CHANGE" -gt "$LARGEST_CHANGE" ]; then
LARGEST_CHANGE="$ABS_CHANGE"
LARGEST_COMMIT="$COMMIT"
LARGEST_SIZE_BEFORE="$SIZE_BEFORE"
LARGEST_SIZE_AFTER="$SIZE_AFTER"
fi
done
# Return to the original branch
# (Cleanup function will handle restoring stashed changes)
git checkout -q "$CURRENT_BRANCH"
echo -e "\nAnalysis complete!"
# Function to format size in human-readable format
format_size() {
local size=$1
if [ "$size" -ge 1073741824 ]; then
echo "$(echo "scale=2; $size / 1073741824" | bc) GB"
elif [ "$size" -ge 1048576 ]; then
echo "$(echo "scale=2; $size / 1048576" | bc) MB"
elif [ "$size" -ge 1024 ]; then
echo "$(echo "scale=2; $size / 1024" | bc) KB"
else
echo "$size bytes"
fi
}
# Display the result
if [ -n "$LARGEST_COMMIT" ]; then
echo
echo "Commit with the largest size change:"
echo "-----------------------------------"
echo "Commit: $LARGEST_COMMIT"
echo "Author: $(git show -s --format="%an" "$LARGEST_COMMIT")"
echo "Date: $(git show -s --format="%cd" --date=format:"%Y-%m-%d %H:%M:%S" "$LARGEST_COMMIT")"
echo "Message: $(git show -s --format="%s" "$LARGEST_COMMIT")"
echo
echo "Size before: $(format_size "$LARGEST_SIZE_BEFORE")"
echo "Size after: $(format_size "$LARGEST_SIZE_AFTER")"
CHANGE=$((LARGEST_SIZE_AFTER - LARGEST_SIZE_BEFORE))
if [ "$CHANGE" -ge 0 ]; then
echo "Change: +$(format_size "${CHANGE#-}") (increased)"
else
echo "Change: -$(format_size "${CHANGE#-}") (decreased)"
fi
if [ "$LARGEST_SIZE_BEFORE" -ne 0 ]; then
PERCENT_CHANGE=$(echo "scale=2; 100 * $CHANGE / $LARGEST_SIZE_BEFORE" | bc)
echo "Percentage change: $PERCENT_CHANGE%"
fi
echo
echo "Files changed in this commit:"
# Get the list of changed files
CHANGED_FILES=$(git show --stat "$LARGEST_COMMIT" | grep '|' | sort -rn -k3)
TOTAL_FILES=$(echo "$CHANGED_FILES" | wc -l)
# If there are too many files, show only the top 10 with the most changes
if [ "$TOTAL_FILES" -gt 10 ]; then
echo "$CHANGED_FILES" | head -n 10
echo "... and $(($TOTAL_FILES - 10)) more files (total: $TOTAL_FILES files changed)"
else
echo "$CHANGED_FILES"
fi
else
echo "No commits analyzed."
fi
echo
echo "Full results saved to: $TEMP_FILE"
echo "You can import this CSV file into a spreadsheet for further analysis."
# Make the script executable
chmod +x "$0"