#!/bin/sh # this script uses metaflac and rsgain to set the following tags for flac files: # ALBUM # ARTIST # ALBUMARTIST # DATE # GENRE # TITLE # TRACKNUMBER # DISCNUMBER # CONTENTGROUP # ORGANIZATION # REPLAYGAIN_TRACK_GAIN # REPLAYGAIN_TRACK_PEAK # REPLAYGAIN_ALBUM_GAIN # REPLAYGAIN_ALBUM_PEAK # the -x arg can be used to remove all tags that are not listed above. this is kept # as a separate option outside the main loop as you may want to do manual edits # before removing them and additional tags could have info you need in them. # the db.csv file is required to automate as much of the tagging as possible. # generate it using list.sh before using this script. # check variables and programs. check() { [ ! "$(command -v metaflac)" ] || [ ! "$(command -v rsgain)" ] && echo "metaflac and rsgain must be installed to use this script." && exit [ ! -f "$db" ] && echo "could not find tags.csv database. generate it using ./list.sh -d -t" && exit [ ! -d "$master" ] && echo "invalid directory!" && exit [ ! -d "$(dirname "$log")" ] && mkdir "$(dirname "$log")" } # get release date in YYYYMMDD format. getdate() { # get existing date tag. date="$(metaflac --show-tag=DATE "$file" | cut -d '=' -f 2)" # get existing date length to determine whether tag is formatted correctly or not. len="$(printf "%s" "$date" | wc -m)" # if len is 8 , the date tag *should* already be set correctly and this step can be skipped. # there's a chance that that date could be in DDMMYYYY format but it's not common and it's # easy to spot once the rename script has been run and dates are present in the directory names. [ "$len" -eq 8 ] && return # if len is 10, then the date is set correctly but contains '-', '.', '/' or ' ' between the numbers, so remove them. # alternatively, if date has ' / ' in it, then it contains 2 sets of dates (god knows why), so remove the second date. if [ "$len" -eq 10 ] || echo "$date" | grep -qE ' / [0-9]{4}'; then date="$(echo "$date" | sed 's/ \/ .*//;s/-//g;s/\.//g;s/\///g;s/ //g')" else # if date is not set correctly, try to pull full release date from directory name. dirdate="$(basename "$dir" | grep -oE '[0-9]{4}(-|\.| )?[0-9]{2}(-|\.| )?[0-9]{2}' | sed 's/-//g;s/\.//g;s/ //g')" if [ -n "$dirdate" ]; then # if the date was found in the directory name, use it. date="$dirdate" else # if the release date wasn't in the directory name then try to get only the release year. # first check len. if it's 4 that means the year was already set in the date tag and we can use it. if [ "$len" -eq 4 ]; then year="$date" # if date tag doesn't work, check for a year tag. elif [ -n "$(metaflac --show-tag=YEAR "$file")" ]; then y="$(metaflac --show-tag=YEAR "$file" | cut -d '=' -f 2)" # only use year tag if len is 4. sometimes the full date can be in this tag, # but it's a rare case not really worth accounting for. [ "$(printf "%s" "$y" | wc -m)" -eq 4 ] && year="$y" else # if year was not set in either of the tags, try to pull year from directory name. year="$(basename "$dir" | grep -oE '[0-9]{4}(\.|\)|\]|-)' | cut -c 1-4)" fi # if year could be set, ask user for the month and day. if [ -n "$year" ]; then if [ -n "$mod" ]; then printf "enter release month and day in MMDD format: " read -r mmdd $date" com="${com} --remove-tag=DATE --set-tag=DATE=\"$date\"" } # get disc number. getdisc() { # get existing disc tag disc="$(metaflac --show-tag=DISCNUMBER "$file" | cut -d '=' -f 2)" # if disc tag exists, make sure it's set correctly if [ -n "$disc" ]; then # if disc contains only a single digit, it must already be set correctly. [ "$(printf "%s" "$disc" | wc -m)" -eq 1 ] && return # if disc has more than one character (eg. '/', '-' or leading '0') remove them echo "$disc" | grep -qE '(/|-|^0)' && disc="$(echo "$disc" | sed 's/\/.*//;s/-.*//;s/^0//')" # if disc tag doesn't exist, default to one disc. elif [ -z "$disc" ]; then disc=1 fi echo "disc number -> $disc" com="${com} --remove-tag=DISCNUMBER --set-tag=DISCNUMBER=\"$disc\"" } # get genre. getgenre() { # some releases may have extra genres but as long as K-Pop is the first one listed # and each additional genre has it's own individual tag, we can leave them as is. if ! metaflac --show-tag=GENRE "$file" | head -n 1 | grep -q '=K-Pop$'; then echo "genre -> K-Pop" com="${com} --remove-tag=GENRE --set-tag=GENRE=K-Pop" fi } # get artist and album artist. getartist() { # set vars. unit="" text="$1" tag="$(echo "$1" | tr -d ' ' | tr '[:lower:]' '[:upper:]')" # get existing artist tag. artist="$(metaflac --show-tag="$tag" "$file" | cut -d '=' -f 2)" # various artist albums will not have their artist tags overwritten. # this only applies to the first call of getartist() because the album artist tag could still contain () / [] / non-ascii chars. if [ "$tag" = "ARTIST" ]; then if metaflac --show-tag=ALBUMARTIST "$file" | grep -qi various; then echo "various artist album. skipping setting artist tags..." && return fi fi # if album artist tag doesn't exist, get it from the artist tag. if [ "$tag" = "ALBUMARTIST" ]; then if [ -z "$artist" ]; then if [ -n "$mod" ]; then artist="$(metaflac --show-tag=ARTIST "$file" | cut -d '=' -f 2)" else # since artist tag is always set correctly before album artist tag, # this command won't be 100% accurate, but that's fine for a dry run. artist="$(metaflac --show-tag=ARTIST "$file" | cut -d '=' -f 2 | sed 's/^.*=//;s/ (.*)$//;s/ \[.*\]$//')" fi fi fi # if tag contains () or [], implying transliterations, remove them. echo "$artist" | grep -qE ' (\(|\[).*(\)|\])$' && artist="$(echo "$artist" | sed 's/ (.*)$//;s/ \[.*\]$//')" # if tag contains non-ascii chars, prompt user to overwrite it manually. if echo "$artist" | grep -qP '[^\x00-\x7F]'; then if [ -n "$mod" ]; then printf "%s contains non-ascii chars. enter tag manually or leave blank to skip: " "$text" read -r ans contains non-ascii chars" fi fi # check if artist is a sub-unit. if [ "$tag" = "ALBUMARTIST" ] && grep ^U "$db" | cut -f 2 | grep -qi -m 1 ^"$artist"$; then a="$(metaflac --show-tag=ARTIST "$file" | cut -d '=' -f 2)" aa="$(metaflac --show-tag=ALBUMARTIST "$file" | cut -d '=' -f 2)" # if artist and album artist tags match, prompt for manual entry. if [ "$a" = "$aa" ]; then if [ -n "$mod" ]; then printf "%s is a sub-unit with matching %s tag.\n" "$a" "$tag" printf "enter %s manually or leave blank to skip: " "$tag" read -r ans sub-unit with matching ARTIST and ALBUMARTIST tags." fi # save unit variable for database update in getorg(). unit="U" fi fi # compare artist variable with existing tag to detect any changes that need to be set. if [ "$artist" != "$(metaflac --show-tag="$tag" "$file" | cut -d '=' -f 2)" ]; then echo "$text -> $artist" # rather than adding to $com, artist has to be set immediately for getgroup() and getorg() to work properly. [ -n "$mod" ] && metaflac --remove-tag="$tag" --set-tag="$tag"="$artist" "$dir"/*.flac fi } # get content group. getgroup() { # unset var group="" # check if tag exists already. if [ -z "$(metaflac --show-tag=CONTENTGROUP "$file")" ]; then if [ -n "$mod" ]; then artist="$(metaflac --show-tag=ARTIST "$file" | cut -d '=' -f 2)" else # this command won't be 100% accurate, but that's fine for a dry run. artist="$(metaflac --show-tag=ARTIST "$file" | cut -d '=' -f 2 | sed 's/^.*=//;s/ (.*)$//;s/ \[.*\]$//')" fi # try and get group from the database. get line number first to make sure group is correct for the artist. line="$(cut -f 2 "$db" | grep -ni -m 1 ^"$artist"$ | cut -d ':' -f 1)" [ -n "$line" ] && group="$(sed -n "${line}p" "$db" | cut -f 4)" # if artist and group aren't in the database, prompt for manual entry. if [ -z "$group" ] && [ -n "$mod" ]; then # somewhat obfuscated selection, but single characters makes it quick and I don't want to print too much text. printf "Group: g/b/x\nSolo: f/m\nDuo: fd/md/xd\n" printf "enter content group for %s: " "$artist" read -r ans > "$log" && return fi elif [ -z "$group" ] && [ -z "$mod" ]; then echo "content group -> n/a" && return fi echo "content group -> $group" com="${com} --set-tag=CONTENTGROUP=\"$group\"" fi } # get organization / producer. getorg() { # unset var. org="" # check if tag exists already and doesn't contain non-ascii chars. if [ -z "$(metaflac --show-tag=ORGANIZATION "$file")" ] || metaflac --show-tag=ORGANIZATION "$file" | grep -qP '[^\x00-\x7f]'; then # try using the label tag since label and organization are generally the same thing. label="$(metaflac --show-tag=LABEL "$file" | cut -d '=' -f 2)" if [ -n "$label" ] && ! echo "$label" | grep -qP '[^\x00-\x7F]' ; then org="$label" else # if label tag doesn't exist or contains non-ascii chars, try and get org from the database. if [ -n "$mod" ]; then artist="$(metaflac --show-tag=ARTIST "$file" | cut -d '=' -f 2)" else # this command won't be 100% accurate, but that's fine for a dry run. artist="$(metaflac --show-tag=ARTIST "$file" | cut -d '=' -f 2 | sed 's/^.*=//;s/ (.*)$//;s/ \[.*\]$//')" fi # try and get org from the database. get line number first to make sure org is correct for the artist. line="$(cut -f 2 "$db" | grep -ni -m 1 ^"$artist"$ | cut -d ':' -f 1)" [ -n "$line" ] && org="$(sed -n "${line}p" "$db" | cut -f 3)" fi # if we couldn't get org from the label tag or database, prompt for manual entry. if [ -z "$org" ] && [ -n "$mod" ]; then printf "enter organization for %s: " "$artist" read -r ans > "$log" && return fi elif [ -z "$org" ] && [ -z "$mod" ]; then echo "organization -> n/a" && return fi echo "organization -> $org" com="${com} --set-tag=ORGANIZATION=\"$org\"" fi } # apply replaygain tags using rsgain. replaygain() { printf "\napply replay gain to all albums in %s? [Y/n] " "$master" read -r ans /dev/null 2>&1; then # any other tags present will be removed with this command. metaflac --remove-all-tags-except=ALBUM=ARTIST=TITLE=CONTENTGROUP=GENRE=ALBUMARTIST=DISCNUMBER=REPLAYGAIN_TRACK_GAIN=REPLAYGAIN_TRACK_PEAK=REPLAYGAIN_ALBUM_GAIN=REPLAYGAIN_ALBUM_PEAK=DATE=ORGANIZATION=TRACKNUMBER "$dir"/*.flac num=$((num+1)) fi done fi } # update database with new entry. updatedb() { artist="$(metaflac --show-tag=ARTIST "$file" | cut -d '=' -f 2)" # check if artist exists in database. this initial check must be done because artists can have different organization # tags from release to release. if we went straight to the second check, this wouldn't be accounted for. # various artist releases will also be ignored. if ! cut -f 2 "$db" | grep -qi -m 1 ^"$artist"$ && ! metaflac --show-tag=ALBUMARTIST "$file" | grep -qi various; then # if artist does not exist in the database, prompt user to add it only if we have the necessary metadata. if [ "$(metaflac --show-tag=ARTIST --show-tag=ORGANIZATION --show-tag=CONTENTGROUP "$file" | wc -l)" = 3 ]; then entry="$(metaflac --show-tag=ARTIST --show-tag=ORGANIZATION --show-tag=CONTENTGROUP "$file" | cut -d '=' -f 2 | tr '\n' '\t' | sed -E 's/\t$//')" printf "add \"%s\t%s\" to the database? [Y/n] " "$unit" "$entry" read -r ans > "$db" fi fi } while getopts "d:mrxh" o; do case "${o}" in d) master="$OPTARG";; m) mod=1;; r) check; replaygain; exit;; x) check; remove; exit;; h|*) printf "usage: ./%s [OPTION]...\n\noptions: -d specify directory to scan -m modify files -r apply replaygain tags -x remove additional tags from files\n" "$(basename "$0")" && exit 1 esac done # set log and database files. log="../logs/autotag.log" db="../lists/db.csv" # check variables and programs. check # begin new log. printf "\nscript run - %s\n" "$(date)" >> "$log" [ -z "$mod" ] && printf "\033[1mperforming DRY RUN. use -m to modify files.\033[0m\n\n" # find sub directories in master directory. dirs="$(find "$master" -type d -links 2)" num=1 # scan master directory for sub directories. echo "$dirs" | while read -r dir; do # check if sub directories contain flac files. if ls "$dir"/*.flac >/dev/null 2>&1; then printf "\033[1m[%d/%d] %s\033[0m\n" "$num" "$(echo "$dirs" | wc -l)" "$(basename "$dir")" # reset command. com="" # prefer using a track without features. file="$(find "$dir" -type f -name '*.flac' | grep -iv feat | head -n 1)" if [ "$(find "$dir" -type f -name '*.flac' | wc -l)" -eq 1 ] || [ -z "$file" ]; then file="$(find "$dir" -type f -name '*.flac' | head -n 1)" fi # get tags to build command. getdate getdisc getgenre getartist "artist" getartist "album artist" getgroup getorg if [ -n "$mod" ]; then # update all tags with a single call of metaflac. [ -n "$com" ] && eval "metaflac ${com} \"$dir\"/*.flac 2>> \"$log\"" # update database if possible. updatedb fi num=$((num+1)) fi done # apply replaygain. [ -n "$mod" ] && replaygain printf "\n\033[1mscript completed! remember to check log for errors.\033[0m\n"