it-artikel:linux:shellscript-automatically-fix-wrong-filename-extensions-fix-filename-extension.sh

shellscript - automatically fix wrong filename extensions - fix-filename-extension.sh

Its a little weird topic. But when you have to deal a lot with “non IT people” throwing around lots of files like photos and/or archive files and they mix up each and every file format and filename extensions, it quickly can get messy. This shell script is a early attempt to quickly and easily find these “messed up” (falsely renamed) files on a data volume and have the filename extension “fixed” if possible. This is specially important in mixed platform environment with Windows, Mac and Linux/Unix where sometimes apps act on filename extensions and sometimes on the actual file content.

The shell script is nowhere near complete or comprehensive yet. But its a possible start and building block and might be a help for someone else too.

:!: Since this script actually modifies files in a filesystem recursively, actual use might be somewhat dangerous/harmful. So be warned and use at your own risk!

fix-filename-extension.sh
#!/bin/bash
#
# fix filename extension script Version 2018-12-08
# in current directory (recursively) 
#
# by Axel Werner    [axel.werner.1973@gmail.com]
#                   [mail@awerner.myhome-server.de]
#
# CHANGE HISTORY:
# =========================
#
# 2018-12-08    A.Werner    Release V1.0
#
#
#
#TODO:
#   - xxxx
#
 
 
set -e
#set -x
 
 
# define some colors for console output
 
RED='\033[91m'
YELLOW='\033[93m'
GREEN='\033[92m'
BLUE='\033[96m'
NOCOLOR='\033[0m'
NC=$NOCOLOR
 
 
 
 
function print {
	echo -e "$@"
}
 
function printInfo {
	echo -e "$NC""$@""$NC"
}
 
function printWarning {
	echo -e "$YELLOW""$@""$NC"
}
 
function printError {
	echo -e "$RED""$@""$NC"
}
 
function printDebug {
	echo -e "$BLUE""DEBUG: $@""$NC"
}
 
function reportCaseDetected {
    printDebug "\tcase '$newFileExtension' detected ..."
}
 
function toUpper {
    echo "$@" | tr [a-z] [A-Z]
}
 
function toLower {
    echo "$@" | tr [A-Z] [a-z]
}
 
function noSpaces {
    echo "$@" | tr -d '[:space:]'
}
 
function testFileExtention {
    fileExtensionIsUppercase=$( toUpper "$fileExtensionIs" )
    newFileExtensionUpper=$( toUpper "$newFileExtension" )
 
    if [ "$fileExtensionIsUppercase" = "$newFileExtensionUpper" ] ; then 
        printDebug "\tFile extension matches content."
        return 0
    else
        printWarning "\tFile extension missmatch detected. File '$fileNameWithPath' is of type '$fileTypeDescCompressed'. Expected filename extension is: '$newFileExtension' ."
        return -1
    fi
}
 
find . -type f -print0 | while read -d $'\0' f ; do
    fileNameWithPath="$f"
    print "\nProcessing '$fileNameWithPath' ..."
    fileTypeDescription=$(file "$fileNameWithPath" | cut -f2 -d':')
    printDebug "\tFile Type is\t\t: '$fileTypeDescription'"
    fileExtensionIs="${fileNameWithPath##*.}"
    fileBaseNameNoExt="${fileNameWithPath%.*}"
    printDebug "\tFile Extension is\t: '$fileExtensionIs'"
    printDebug "\tFilename without Ext is\t: '$fileBaseNameNoExt'"
 
    fileTypeDescCompressed=$( toUpper $( noSpaces "${fileTypeDescription}" ) )
 
    case $fileTypeDescCompressed in 
        ( PNGIMAGEDATA* )
            newFileExtension=png
            reportCaseDetected
            if ! testFileExtention ; then
                mv -v "$fileNameWithPath" "${fileBaseNameNoExt}.${newFileExtension}"
            fi
            ;;
        ( JPEGIMAGEDATA* )
            newFileExtension=jpg
            reportCaseDetected
            if ! testFileExtention ; then
                # test for special case when extension has an alternative name.
                if [ $( toUpper $fileExtensionIs ) = $( toUpper "jpeg" ) ] ; then
                    print "\t\tAlternative filename extention 'jpeg' accepted. Moving on..."
                else
                    mv -v "$fileNameWithPath" "${fileBaseNameNoExt}.${newFileExtension}"
                fi
            fi
            ;;
        ( *AVI*VIDEO )
            newFileExtension=avi
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( ISOMEDIA*MP4V2* | ISOMEDIA*M4V*VIDEO | ISOMEDIA*MP4* )
            newFileExtension=mp4
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( *ISOMEDIA*3GPP* )
            newFileExtension=3gp
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( *RARARCHIVEDATA* | 7-ZIPARCHIVEDATA* )
            newFileExtension='-ARCHIVE-'
            reportCaseDetected
            # we dont temper with archive files just yet.
            ;;
        ( *MATROSKADATA* )
            newFileExtension=mkv
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( SENDMAIL* | DATA )
            newFileExtension='-miscDataFile-'
            reportCaseDetected
            # we dont temper with misc data files just yet.
            ;;
        ( MICROSOFTASF )
            newFileExtension=wmv
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( MPEGSEQUENCE,V1* )
            newFileExtension=mpg
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( MACROMEDIAFLASHVIDEO )
            newFileExtension=flv
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( DOSEXECUTABLE* )
            newFileExtension='-DosExecutable-'
            reportCaseDetected
            # we dont temper with that files just yet.
            ;;
        ( WEBM )
            newFileExtension=webm
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( BOURNE-AGAINSHELLSCRIPT* )
            newFileExtension='-BashScript-'
            reportCaseDetected
            # we dont temper with shell script files just yet.
            ;;
        ( ASCIITEXT )
            newFileExtension='-ASCII-'
            reportCaseDetected
            # we dont temper with ASCII files just yet.
            ;;
        ( *STRING* )
            newFileExtension=EXTENSION
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( *STRING* )
            newFileExtension=EXTENSION
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( *STRING* )
            newFileExtension=EXTENSION
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( *STRING* )
            newFileExtension=EXTENSION
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( *STRING* )
            newFileExtension=EXTENSION
            reportCaseDetected
            # we dont temper with video files just yet.
            ;;
        ( * )
            printError "ERROR: We dont know how to handle file type '$fileTypeDescCompressed' yet. Please extend this shell script manually so it gets better."
            exit -1
    esac
done
 
 

Axel Werner 2018-12-08 17:17

it-artikel/linux/shellscript-automatically-fix-wrong-filename-extensions-fix-filename-extension.sh.txt · Last modified: 2022-08-31 12:30 by 127.0.0.1