222 lines
6.7 KiB
Bash
222 lines
6.7 KiB
Bash
![]() |
#!/bin/bash
|
||
|
|
||
|
# Script to find which commit made the biggest change in repository size
|
||
|
#
|
||
|
# This script analyzes git commit history to determine which commit caused
|
||
|
# the largest change in the repository's size. It checks out each commit,
|
||
|
# measures the repository size, and identifies the commit with the biggest
|
||
|
# size difference.
|
||
|
#
|
||
|
# Usage: ./find_largest_commit.sh [number_of_commits_to_check]
|
||
|
#
|
||
|
# Arguments:
|
||
|
# number_of_commits_to_check: Optional. Number of recent commits to analyze.
|
||
|
# Defaults to 100 if not specified.
|
||
|
#
|
||
|
# Output:
|
||
|
# - Detailed information about the commit with the largest size change
|
||
|
# - A CSV file with data for all analyzed commits
|
||
|
#
|
||
|
# Requirements:
|
||
|
# - git
|
||
|
# - bc (for floating-point calculations)
|
||
|
# - du (for measuring directory sizes)
|
||
|
#
|
||
|
# Author: Junie (JetBrains AI)
|
||
|
# Date: 2025-08-31
|
||
|
|
||
|
# Exit on error
|
||
|
set -e
|
||
|
|
||
|
# Trap for cleanup in case of unexpected exit
|
||
|
trap cleanup EXIT
|
||
|
|
||
|
cleanup() {
|
||
|
# Make sure we return to the original branch
|
||
|
if [ -n "$CURRENT_BRANCH" ]; then
|
||
|
git checkout -q "$CURRENT_BRANCH" 2>/dev/null || true
|
||
|
|
||
|
# Restore stashed changes if needed
|
||
|
if [ "$STASH_NEEDED" = true ]; then
|
||
|
echo "Restoring stashed changes..."
|
||
|
git stash pop -q 2>/dev/null || true
|
||
|
fi
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
# Default to checking the last 100 commits if not specified
|
||
|
NUM_COMMITS=${1:-100}
|
||
|
|
||
|
# Validate input
|
||
|
if ! [[ "$NUM_COMMITS" =~ ^[0-9]+$ ]]; then
|
||
|
echo "Error: Number of commits must be a positive integer."
|
||
|
echo "Usage: $0 [number_of_commits_to_check]"
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
if [ "$NUM_COMMITS" -lt 1 ]; then
|
||
|
echo "Error: Number of commits must be at least 1."
|
||
|
echo "Usage: $0 [number_of_commits_to_check]"
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
echo "Analyzing the last $NUM_COMMITS commits to find the largest size change..."
|
||
|
echo "This may take some time depending on repository size and history."
|
||
|
echo
|
||
|
|
||
|
# Get the list of commit hashes
|
||
|
COMMITS=$(git log --pretty=format:"%H" -n "$NUM_COMMITS")
|
||
|
|
||
|
# Initialize variables to track the largest change
|
||
|
LARGEST_CHANGE=0
|
||
|
LARGEST_COMMIT=""
|
||
|
LARGEST_SIZE_BEFORE=0
|
||
|
LARGEST_SIZE_AFTER=0
|
||
|
|
||
|
# Function to get repository size at a specific commit
|
||
|
get_repo_size() {
|
||
|
local commit=$1
|
||
|
# Checkout the commit
|
||
|
git checkout -q "$commit"
|
||
|
# Calculate size in bytes (excluding .git directory)
|
||
|
local size=$(du -sb --exclude=.git . | cut -f1)
|
||
|
echo "$size"
|
||
|
}
|
||
|
|
||
|
# Store current branch to return to it later
|
||
|
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
|
||
|
|
||
|
# Check if there are uncommitted changes
|
||
|
if [[ -n $(git status -s) ]]; then
|
||
|
echo "Stashing uncommitted changes before proceeding..."
|
||
|
STASH_NEEDED=true
|
||
|
git stash push -m "Temporary stash by find_largest_commit.sh script"
|
||
|
else
|
||
|
STASH_NEEDED=false
|
||
|
fi
|
||
|
|
||
|
# Temporary file to store results
|
||
|
TEMP_FILE=$(mktemp)
|
||
|
|
||
|
echo "Commit Hash,Author,Date,Size Before (bytes),Size After (bytes),Change (bytes),Change (%),Message" > "$TEMP_FILE"
|
||
|
|
||
|
# Counter for progress display
|
||
|
COUNTER=0
|
||
|
TOTAL_COMMITS=$(echo "$COMMITS" | wc -l)
|
||
|
|
||
|
# Process each commit
|
||
|
PREV_SIZE=""
|
||
|
for COMMIT in $COMMITS; do
|
||
|
COUNTER=$((COUNTER + 1))
|
||
|
echo -ne "Processing commit $COUNTER/$TOTAL_COMMITS...\r"
|
||
|
|
||
|
# Get commit details
|
||
|
AUTHOR=$(git show -s --format="%an" "$COMMIT")
|
||
|
DATE=$(git show -s --format="%cd" --date=format:"%Y-%m-%d %H:%M:%S" "$COMMIT")
|
||
|
MESSAGE=$(git show -s --format="%s" "$COMMIT" | sed 's/,/;/g') # Replace commas with semicolons
|
||
|
|
||
|
# Get size after this commit
|
||
|
SIZE_AFTER=$(get_repo_size "$COMMIT")
|
||
|
|
||
|
# If this is the first commit we're checking, we don't have a previous size
|
||
|
if [ -z "$PREV_SIZE" ]; then
|
||
|
PREV_SIZE="$SIZE_AFTER"
|
||
|
continue
|
||
|
fi
|
||
|
|
||
|
# Calculate size before (which is the size after the previous commit)
|
||
|
SIZE_BEFORE="$PREV_SIZE"
|
||
|
PREV_SIZE="$SIZE_AFTER"
|
||
|
|
||
|
# Calculate change
|
||
|
CHANGE=$((SIZE_AFTER - SIZE_BEFORE))
|
||
|
ABS_CHANGE=${CHANGE#-} # Absolute value
|
||
|
|
||
|
# Calculate percentage change
|
||
|
if [ "$SIZE_BEFORE" -ne 0 ]; then
|
||
|
PERCENT_CHANGE=$(echo "scale=2; 100 * $CHANGE / $SIZE_BEFORE" | bc)
|
||
|
else
|
||
|
PERCENT_CHANGE="N/A"
|
||
|
fi
|
||
|
|
||
|
# Record the data
|
||
|
echo "$COMMIT,$AUTHOR,$DATE,$SIZE_BEFORE,$SIZE_AFTER,$CHANGE,$PERCENT_CHANGE%,$MESSAGE" >> "$TEMP_FILE"
|
||
|
|
||
|
# Check if this is the largest change so far
|
||
|
if [ "$ABS_CHANGE" -gt "$LARGEST_CHANGE" ]; then
|
||
|
LARGEST_CHANGE="$ABS_CHANGE"
|
||
|
LARGEST_COMMIT="$COMMIT"
|
||
|
LARGEST_SIZE_BEFORE="$SIZE_BEFORE"
|
||
|
LARGEST_SIZE_AFTER="$SIZE_AFTER"
|
||
|
fi
|
||
|
done
|
||
|
|
||
|
# Return to the original branch
|
||
|
# (Cleanup function will handle restoring stashed changes)
|
||
|
git checkout -q "$CURRENT_BRANCH"
|
||
|
|
||
|
echo -e "\nAnalysis complete!"
|
||
|
|
||
|
# Function to format size in human-readable format
|
||
|
format_size() {
|
||
|
local size=$1
|
||
|
if [ "$size" -ge 1073741824 ]; then
|
||
|
echo "$(echo "scale=2; $size / 1073741824" | bc) GB"
|
||
|
elif [ "$size" -ge 1048576 ]; then
|
||
|
echo "$(echo "scale=2; $size / 1048576" | bc) MB"
|
||
|
elif [ "$size" -ge 1024 ]; then
|
||
|
echo "$(echo "scale=2; $size / 1024" | bc) KB"
|
||
|
else
|
||
|
echo "$size bytes"
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
# Display the result
|
||
|
if [ -n "$LARGEST_COMMIT" ]; then
|
||
|
echo
|
||
|
echo "Commit with the largest size change:"
|
||
|
echo "-----------------------------------"
|
||
|
echo "Commit: $LARGEST_COMMIT"
|
||
|
echo "Author: $(git show -s --format="%an" "$LARGEST_COMMIT")"
|
||
|
echo "Date: $(git show -s --format="%cd" --date=format:"%Y-%m-%d %H:%M:%S" "$LARGEST_COMMIT")"
|
||
|
echo "Message: $(git show -s --format="%s" "$LARGEST_COMMIT")"
|
||
|
echo
|
||
|
echo "Size before: $(format_size "$LARGEST_SIZE_BEFORE")"
|
||
|
echo "Size after: $(format_size "$LARGEST_SIZE_AFTER")"
|
||
|
|
||
|
CHANGE=$((LARGEST_SIZE_AFTER - LARGEST_SIZE_BEFORE))
|
||
|
if [ "$CHANGE" -ge 0 ]; then
|
||
|
echo "Change: +$(format_size "${CHANGE#-}") (increased)"
|
||
|
else
|
||
|
echo "Change: -$(format_size "${CHANGE#-}") (decreased)"
|
||
|
fi
|
||
|
|
||
|
if [ "$LARGEST_SIZE_BEFORE" -ne 0 ]; then
|
||
|
PERCENT_CHANGE=$(echo "scale=2; 100 * $CHANGE / $LARGEST_SIZE_BEFORE" | bc)
|
||
|
echo "Percentage change: $PERCENT_CHANGE%"
|
||
|
fi
|
||
|
|
||
|
echo
|
||
|
echo "Files changed in this commit:"
|
||
|
|
||
|
# Get the list of changed files
|
||
|
CHANGED_FILES=$(git show --stat "$LARGEST_COMMIT" | grep '|' | sort -rn -k3)
|
||
|
TOTAL_FILES=$(echo "$CHANGED_FILES" | wc -l)
|
||
|
|
||
|
# If there are too many files, show only the top 10 with the most changes
|
||
|
if [ "$TOTAL_FILES" -gt 10 ]; then
|
||
|
echo "$CHANGED_FILES" | head -n 10
|
||
|
echo "... and $(($TOTAL_FILES - 10)) more files (total: $TOTAL_FILES files changed)"
|
||
|
else
|
||
|
echo "$CHANGED_FILES"
|
||
|
fi
|
||
|
else
|
||
|
echo "No commits analyzed."
|
||
|
fi
|
||
|
|
||
|
echo
|
||
|
echo "Full results saved to: $TEMP_FILE"
|
||
|
echo "You can import this CSV file into a spreadsheet for further analysis."
|
||
|
|
||
|
# Make the script executable
|
||
|
chmod +x "$0"
|