bitreich-memestats

metrics and graphs for the bitreich meme collection
git clone git://src.adamsgaard.dk/bitreich-memestats # fast
git clone https://src.adamsgaard.dk/bitreich-memestats.git # slow
Log | Files | Refs | LICENSE Back to index

extract-memecount.sh (2470B)


      1 #!/bin/sh
      2 # generate table of hashtag numbers over time
      3 
      4 if [ $# -lt 1 ];
      5 then
      6 	printf "usage: %s annnadir [memecount.log]\n" "$(basename "$0")" >&2
      7 	exit 1
      8 fi
      9 
     10 annnadir="${1}"
     11 memefile="modules/hashtags/hashtags.txt"
     12 emojidir="/br/gopher/emoji/"
     13 filterdir="/br/gopher/memecache/filter/"
     14 update_annna=yes
     15 outputfile="${2:-memecount.log}"
     16 revisionrange=""
     17 datefmt="%Y-%m-%d"
     18 
     19 countolderfiles() {
     20 	find "$1" -maxdepth 1 -type f \! \( -newerct "$2" -o -name '*.orig' \) | wc -l
     21 }
     22 
     23 if [ ! -e "${annnadir}/${memefile}" ]; then
     24 	printf 'error: could not open %s\n' "${annnadir}/${memefile}"
     25 	exit 1
     26 fi
     27 
     28 if [ -f "${outputfile}" ]; then
     29 	lastrevision="$(tail -n 1 "${outputfile}" | cut -f 2)"
     30 	revisionrange="${lastrevision}..HEAD"
     31 	if [ "$(date +"${datefmt}")" = "$(tail -n 1 "${outputfile}" | cut -f 1)" ]; then
     32 		exit
     33 	fi
     34 else
     35 	firstrevision="$(cd "${annnadir}" && git rev-list --max-parents=0 HEAD)"
     36 	revisionrange="${firstrevision}..HEAD"
     37 fi
     38 
     39 case "${outputfile}" in
     40 /*)
     41 	;;
     42 *)
     43 	outputfile="$(pwd)/${outputfile}"
     44 	;;
     45 esac
     46 
     47 if [ "$update_annna" = "yes" ]; then
     48 	(cd "${annnadir}" && git pull >/dev/null 2>&1)
     49 fi
     50 
     51 headcommit="$(git rev-parse HEAD)"
     52 i=0
     53 # derived from Hiltjo Posthuma's loc.sh
     54 (cd "${annnadir}" && git log --pretty='format:%H %cd %at' --date="format:${datefmt}" \
     55 	"${revisionrange}") | \
     56 	sort -k 2 | uniq -f 1 | \
     57 	while read -r commit date timestamp; do
     58 
     59 	# hashtags originally stored in annna-start-services...
     60 	n="$(cd "${annnadir}" && git show "$commit:annna-start-services" 2>/dev/null | \
     61 		grep -E '^	#[a-z0-9]' | wc -l | awk '{print $1}')"
     62 
     63 	# ...but are now stored in $memefile
     64 	if [ "$n" -le 1 ]; then
     65 		n="$(cd "${annnadir}" && git show "$commit:$memefile" 2>/dev/null | \
     66 		     wc -l | awk '{print $1}')"
     67 	fi
     68 
     69 	if [ "$n" -gt 1 ]; then
     70 		
     71 		# only one row per day unless the most recent commit is newer
     72 		if [ "$date" != "$lastprintdate" ] || [ "$commit" = "$headcommit" ]; then
     73 
     74 			if [ "$i" -eq 1 ]; then
     75 				if [ "$commit" != "$headcommit" ]; then
     76 					dn_dt="$(awk \
     77 						-v n0="$n0" -v t0="$t0" -v n="$n" -v t="$timestamp" \
     78 						'BEGIN{print (n-n0)/(t-t0)*3600.0*24.0}')"
     79 				fi
     80 			else
     81 				dn_dt=0
     82 				i=1
     83 			fi
     84 			n0="$n"
     85 			t0="$timestamp"
     86 			lastprintdate="$date"
     87 			n_emoji="$(countolderfiles "$emojidir" "$date")"
     88 			n_filter="$(countolderfiles "$filterdir" "$date")"
     89 
     90 			printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
     91 				"$date" "$commit" "$n" "$timestamp" "$dn_dt" \
     92 				"$n_emoji" "$n_filter" \
     93 				>> "${outputfile}"
     94 
     95 		fi
     96 	fi
     97 done
     98