up wiki compare
This commit is contained in:
parent
d2936d5730
commit
1535cf8ee3
8 changed files with 1036 additions and 79 deletions
222
find_largest_commit.sh
Executable file
222
find_largest_commit.sh
Executable file
|
@ -0,0 +1,222 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Script to find which commit made the biggest change in repository size
|
||||
#
|
||||
# This script analyzes git commit history to determine which commit caused
|
||||
# the largest change in the repository's size. It checks out each commit,
|
||||
# measures the repository size, and identifies the commit with the biggest
|
||||
# size difference.
|
||||
#
|
||||
# Usage: ./find_largest_commit.sh [number_of_commits_to_check]
|
||||
#
|
||||
# Arguments:
|
||||
# number_of_commits_to_check: Optional. Number of recent commits to analyze.
|
||||
# Defaults to 100 if not specified.
|
||||
#
|
||||
# Output:
|
||||
# - Detailed information about the commit with the largest size change
|
||||
# - A CSV file with data for all analyzed commits
|
||||
#
|
||||
# Requirements:
|
||||
# - git
|
||||
# - bc (for floating-point calculations)
|
||||
# - du (for measuring directory sizes)
|
||||
#
|
||||
# Author: Junie (JetBrains AI)
|
||||
# Date: 2025-08-31
|
||||
|
||||
# Exit on error
|
||||
set -e
|
||||
|
||||
# Trap for cleanup in case of unexpected exit
|
||||
trap cleanup EXIT
|
||||
|
||||
cleanup() {
|
||||
# Make sure we return to the original branch
|
||||
if [ -n "$CURRENT_BRANCH" ]; then
|
||||
git checkout -q "$CURRENT_BRANCH" 2>/dev/null || true
|
||||
|
||||
# Restore stashed changes if needed
|
||||
if [ "$STASH_NEEDED" = true ]; then
|
||||
echo "Restoring stashed changes..."
|
||||
git stash pop -q 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Default to checking the last 100 commits if not specified
|
||||
NUM_COMMITS=${1:-100}
|
||||
|
||||
# Validate input
|
||||
if ! [[ "$NUM_COMMITS" =~ ^[0-9]+$ ]]; then
|
||||
echo "Error: Number of commits must be a positive integer."
|
||||
echo "Usage: $0 [number_of_commits_to_check]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$NUM_COMMITS" -lt 1 ]; then
|
||||
echo "Error: Number of commits must be at least 1."
|
||||
echo "Usage: $0 [number_of_commits_to_check]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Analyzing the last $NUM_COMMITS commits to find the largest size change..."
|
||||
echo "This may take some time depending on repository size and history."
|
||||
echo
|
||||
|
||||
# Get the list of commit hashes
|
||||
COMMITS=$(git log --pretty=format:"%H" -n "$NUM_COMMITS")
|
||||
|
||||
# Initialize variables to track the largest change
|
||||
LARGEST_CHANGE=0
|
||||
LARGEST_COMMIT=""
|
||||
LARGEST_SIZE_BEFORE=0
|
||||
LARGEST_SIZE_AFTER=0
|
||||
|
||||
# Function to get repository size at a specific commit
|
||||
get_repo_size() {
|
||||
local commit=$1
|
||||
# Checkout the commit
|
||||
git checkout -q "$commit"
|
||||
# Calculate size in bytes (excluding .git directory)
|
||||
local size=$(du -sb --exclude=.git . | cut -f1)
|
||||
echo "$size"
|
||||
}
|
||||
|
||||
# Store current branch to return to it later
|
||||
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
|
||||
|
||||
# Check if there are uncommitted changes
|
||||
if [[ -n $(git status -s) ]]; then
|
||||
echo "Stashing uncommitted changes before proceeding..."
|
||||
STASH_NEEDED=true
|
||||
git stash push -m "Temporary stash by find_largest_commit.sh script"
|
||||
else
|
||||
STASH_NEEDED=false
|
||||
fi
|
||||
|
||||
# Temporary file to store results
|
||||
TEMP_FILE=$(mktemp)
|
||||
|
||||
echo "Commit Hash,Author,Date,Size Before (bytes),Size After (bytes),Change (bytes),Change (%),Message" > "$TEMP_FILE"
|
||||
|
||||
# Counter for progress display
|
||||
COUNTER=0
|
||||
TOTAL_COMMITS=$(echo "$COMMITS" | wc -l)
|
||||
|
||||
# Process each commit
|
||||
PREV_SIZE=""
|
||||
for COMMIT in $COMMITS; do
|
||||
COUNTER=$((COUNTER + 1))
|
||||
echo -ne "Processing commit $COUNTER/$TOTAL_COMMITS...\r"
|
||||
|
||||
# Get commit details
|
||||
AUTHOR=$(git show -s --format="%an" "$COMMIT")
|
||||
DATE=$(git show -s --format="%cd" --date=format:"%Y-%m-%d %H:%M:%S" "$COMMIT")
|
||||
MESSAGE=$(git show -s --format="%s" "$COMMIT" | sed 's/,/;/g') # Replace commas with semicolons
|
||||
|
||||
# Get size after this commit
|
||||
SIZE_AFTER=$(get_repo_size "$COMMIT")
|
||||
|
||||
# If this is the first commit we're checking, we don't have a previous size
|
||||
if [ -z "$PREV_SIZE" ]; then
|
||||
PREV_SIZE="$SIZE_AFTER"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Calculate size before (which is the size after the previous commit)
|
||||
SIZE_BEFORE="$PREV_SIZE"
|
||||
PREV_SIZE="$SIZE_AFTER"
|
||||
|
||||
# Calculate change
|
||||
CHANGE=$((SIZE_AFTER - SIZE_BEFORE))
|
||||
ABS_CHANGE=${CHANGE#-} # Absolute value
|
||||
|
||||
# Calculate percentage change
|
||||
if [ "$SIZE_BEFORE" -ne 0 ]; then
|
||||
PERCENT_CHANGE=$(echo "scale=2; 100 * $CHANGE / $SIZE_BEFORE" | bc)
|
||||
else
|
||||
PERCENT_CHANGE="N/A"
|
||||
fi
|
||||
|
||||
# Record the data
|
||||
echo "$COMMIT,$AUTHOR,$DATE,$SIZE_BEFORE,$SIZE_AFTER,$CHANGE,$PERCENT_CHANGE%,$MESSAGE" >> "$TEMP_FILE"
|
||||
|
||||
# Check if this is the largest change so far
|
||||
if [ "$ABS_CHANGE" -gt "$LARGEST_CHANGE" ]; then
|
||||
LARGEST_CHANGE="$ABS_CHANGE"
|
||||
LARGEST_COMMIT="$COMMIT"
|
||||
LARGEST_SIZE_BEFORE="$SIZE_BEFORE"
|
||||
LARGEST_SIZE_AFTER="$SIZE_AFTER"
|
||||
fi
|
||||
done
|
||||
|
||||
# Return to the original branch
|
||||
# (Cleanup function will handle restoring stashed changes)
|
||||
git checkout -q "$CURRENT_BRANCH"
|
||||
|
||||
echo -e "\nAnalysis complete!"
|
||||
|
||||
# Function to format size in human-readable format
|
||||
format_size() {
|
||||
local size=$1
|
||||
if [ "$size" -ge 1073741824 ]; then
|
||||
echo "$(echo "scale=2; $size / 1073741824" | bc) GB"
|
||||
elif [ "$size" -ge 1048576 ]; then
|
||||
echo "$(echo "scale=2; $size / 1048576" | bc) MB"
|
||||
elif [ "$size" -ge 1024 ]; then
|
||||
echo "$(echo "scale=2; $size / 1024" | bc) KB"
|
||||
else
|
||||
echo "$size bytes"
|
||||
fi
|
||||
}
|
||||
|
||||
# Display the result
|
||||
if [ -n "$LARGEST_COMMIT" ]; then
|
||||
echo
|
||||
echo "Commit with the largest size change:"
|
||||
echo "-----------------------------------"
|
||||
echo "Commit: $LARGEST_COMMIT"
|
||||
echo "Author: $(git show -s --format="%an" "$LARGEST_COMMIT")"
|
||||
echo "Date: $(git show -s --format="%cd" --date=format:"%Y-%m-%d %H:%M:%S" "$LARGEST_COMMIT")"
|
||||
echo "Message: $(git show -s --format="%s" "$LARGEST_COMMIT")"
|
||||
echo
|
||||
echo "Size before: $(format_size "$LARGEST_SIZE_BEFORE")"
|
||||
echo "Size after: $(format_size "$LARGEST_SIZE_AFTER")"
|
||||
|
||||
CHANGE=$((LARGEST_SIZE_AFTER - LARGEST_SIZE_BEFORE))
|
||||
if [ "$CHANGE" -ge 0 ]; then
|
||||
echo "Change: +$(format_size "${CHANGE#-}") (increased)"
|
||||
else
|
||||
echo "Change: -$(format_size "${CHANGE#-}") (decreased)"
|
||||
fi
|
||||
|
||||
if [ "$LARGEST_SIZE_BEFORE" -ne 0 ]; then
|
||||
PERCENT_CHANGE=$(echo "scale=2; 100 * $CHANGE / $LARGEST_SIZE_BEFORE" | bc)
|
||||
echo "Percentage change: $PERCENT_CHANGE%"
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "Files changed in this commit:"
|
||||
|
||||
# Get the list of changed files
|
||||
CHANGED_FILES=$(git show --stat "$LARGEST_COMMIT" | grep '|' | sort -rn -k3)
|
||||
TOTAL_FILES=$(echo "$CHANGED_FILES" | wc -l)
|
||||
|
||||
# If there are too many files, show only the top 10 with the most changes
|
||||
if [ "$TOTAL_FILES" -gt 10 ]; then
|
||||
echo "$CHANGED_FILES" | head -n 10
|
||||
echo "... and $(($TOTAL_FILES - 10)) more files (total: $TOTAL_FILES files changed)"
|
||||
else
|
||||
echo "$CHANGED_FILES"
|
||||
fi
|
||||
else
|
||||
echo "No commits analyzed."
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "Full results saved to: $TEMP_FILE"
|
||||
echo "You can import this CSV file into a spreadsheet for further analysis."
|
||||
|
||||
# Make the script executable
|
||||
chmod +x "$0"
|
Loading…
Add table
Add a link
Reference in a new issue