#!/bin/bash # Script to find which commit made the biggest change in repository size # # This script analyzes git commit history to determine which commit caused # the largest change in the repository's size. It checks out each commit, # measures the repository size, and identifies the commit with the biggest # size difference. # # Usage: ./find_largest_commit.sh [number_of_commits_to_check] # # Arguments: # number_of_commits_to_check: Optional. Number of recent commits to analyze. # Defaults to 100 if not specified. # # Output: # - Detailed information about the commit with the largest size change # - A CSV file with data for all analyzed commits # # Requirements: # - git # - bc (for floating-point calculations) # - du (for measuring directory sizes) # # Author: Junie (JetBrains AI) # Date: 2025-08-31 # Exit on error set -e # Trap for cleanup in case of unexpected exit trap cleanup EXIT cleanup() { # Make sure we return to the original branch if [ -n "$CURRENT_BRANCH" ]; then git checkout -q "$CURRENT_BRANCH" 2>/dev/null || true # Restore stashed changes if needed if [ "$STASH_NEEDED" = true ]; then echo "Restoring stashed changes..." git stash pop -q 2>/dev/null || true fi fi } # Default to checking the last 100 commits if not specified NUM_COMMITS=${1:-100} # Validate input if ! [[ "$NUM_COMMITS" =~ ^[0-9]+$ ]]; then echo "Error: Number of commits must be a positive integer." echo "Usage: $0 [number_of_commits_to_check]" exit 1 fi if [ "$NUM_COMMITS" -lt 1 ]; then echo "Error: Number of commits must be at least 1." echo "Usage: $0 [number_of_commits_to_check]" exit 1 fi echo "Analyzing the last $NUM_COMMITS commits to find the largest size change..." echo "This may take some time depending on repository size and history." echo # Get the list of commit hashes COMMITS=$(git log --pretty=format:"%H" -n "$NUM_COMMITS") # Initialize variables to track the largest change LARGEST_CHANGE=0 LARGEST_COMMIT="" LARGEST_SIZE_BEFORE=0 LARGEST_SIZE_AFTER=0 # Function to get repository size at a specific commit get_repo_size() { local commit=$1 # Checkout the commit git checkout -q "$commit" # Calculate size in bytes (excluding .git directory) local size=$(du -sb --exclude=.git . | cut -f1) echo "$size" } # Store current branch to return to it later CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) # Check if there are uncommitted changes if [[ -n $(git status -s) ]]; then echo "Stashing uncommitted changes before proceeding..." STASH_NEEDED=true git stash push -m "Temporary stash by find_largest_commit.sh script" else STASH_NEEDED=false fi # Temporary file to store results TEMP_FILE=$(mktemp) echo "Commit Hash,Author,Date,Size Before (bytes),Size After (bytes),Change (bytes),Change (%),Message" > "$TEMP_FILE" # Counter for progress display COUNTER=0 TOTAL_COMMITS=$(echo "$COMMITS" | wc -l) # Process each commit PREV_SIZE="" for COMMIT in $COMMITS; do COUNTER=$((COUNTER + 1)) echo -ne "Processing commit $COUNTER/$TOTAL_COMMITS...\r" # Get commit details AUTHOR=$(git show -s --format="%an" "$COMMIT") DATE=$(git show -s --format="%cd" --date=format:"%Y-%m-%d %H:%M:%S" "$COMMIT") MESSAGE=$(git show -s --format="%s" "$COMMIT" | sed 's/,/;/g') # Replace commas with semicolons # Get size after this commit SIZE_AFTER=$(get_repo_size "$COMMIT") # If this is the first commit we're checking, we don't have a previous size if [ -z "$PREV_SIZE" ]; then PREV_SIZE="$SIZE_AFTER" continue fi # Calculate size before (which is the size after the previous commit) SIZE_BEFORE="$PREV_SIZE" PREV_SIZE="$SIZE_AFTER" # Calculate change CHANGE=$((SIZE_AFTER - SIZE_BEFORE)) ABS_CHANGE=${CHANGE#-} # Absolute value # Calculate percentage change if [ "$SIZE_BEFORE" -ne 0 ]; then PERCENT_CHANGE=$(echo "scale=2; 100 * $CHANGE / $SIZE_BEFORE" | bc) else PERCENT_CHANGE="N/A" fi # Record the data echo "$COMMIT,$AUTHOR,$DATE,$SIZE_BEFORE,$SIZE_AFTER,$CHANGE,$PERCENT_CHANGE%,$MESSAGE" >> "$TEMP_FILE" # Check if this is the largest change so far if [ "$ABS_CHANGE" -gt "$LARGEST_CHANGE" ]; then LARGEST_CHANGE="$ABS_CHANGE" LARGEST_COMMIT="$COMMIT" LARGEST_SIZE_BEFORE="$SIZE_BEFORE" LARGEST_SIZE_AFTER="$SIZE_AFTER" fi done # Return to the original branch # (Cleanup function will handle restoring stashed changes) git checkout -q "$CURRENT_BRANCH" echo -e "\nAnalysis complete!" # Function to format size in human-readable format format_size() { local size=$1 if [ "$size" -ge 1073741824 ]; then echo "$(echo "scale=2; $size / 1073741824" | bc) GB" elif [ "$size" -ge 1048576 ]; then echo "$(echo "scale=2; $size / 1048576" | bc) MB" elif [ "$size" -ge 1024 ]; then echo "$(echo "scale=2; $size / 1024" | bc) KB" else echo "$size bytes" fi } # Display the result if [ -n "$LARGEST_COMMIT" ]; then echo echo "Commit with the largest size change:" echo "-----------------------------------" echo "Commit: $LARGEST_COMMIT" echo "Author: $(git show -s --format="%an" "$LARGEST_COMMIT")" echo "Date: $(git show -s --format="%cd" --date=format:"%Y-%m-%d %H:%M:%S" "$LARGEST_COMMIT")" echo "Message: $(git show -s --format="%s" "$LARGEST_COMMIT")" echo echo "Size before: $(format_size "$LARGEST_SIZE_BEFORE")" echo "Size after: $(format_size "$LARGEST_SIZE_AFTER")" CHANGE=$((LARGEST_SIZE_AFTER - LARGEST_SIZE_BEFORE)) if [ "$CHANGE" -ge 0 ]; then echo "Change: +$(format_size "${CHANGE#-}") (increased)" else echo "Change: -$(format_size "${CHANGE#-}") (decreased)" fi if [ "$LARGEST_SIZE_BEFORE" -ne 0 ]; then PERCENT_CHANGE=$(echo "scale=2; 100 * $CHANGE / $LARGEST_SIZE_BEFORE" | bc) echo "Percentage change: $PERCENT_CHANGE%" fi echo echo "Files changed in this commit:" # Get the list of changed files CHANGED_FILES=$(git show --stat "$LARGEST_COMMIT" | grep '|' | sort -rn -k3) TOTAL_FILES=$(echo "$CHANGED_FILES" | wc -l) # If there are too many files, show only the top 10 with the most changes if [ "$TOTAL_FILES" -gt 10 ]; then echo "$CHANGED_FILES" | head -n 10 echo "... and $(($TOTAL_FILES - 10)) more files (total: $TOTAL_FILES files changed)" else echo "$CHANGED_FILES" fi else echo "No commits analyzed." fi echo echo "Full results saved to: $TEMP_FILE" echo "You can import this CSV file into a spreadsheet for further analysis." # Make the script executable chmod +x "$0"