bitreich-memestats

statistical analysis of tags in Freenode/#bitreich-en
git clone git://src.adamsgaard.dk/bitreich-memestats
Log | Files | Refs Back to index

commit d08ff10b40479174b132e313726b406692972e35
Author: Anders Damsgaard <anders@adamsgaard.dk>
Date:   Fri, 17 Apr 2020 09:55:02 +0200

Add working script extracting data from log

Diffstat:
AMakefile | 9+++++++++
Aextract_memeuse.awk | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,9 @@ +log = ~/.irssi/log/Freenode/\#bitreich-en.log + +memeuse.tsv: extract_memeuse.awk $(log) + awk -f extract_memeuse.awk $(log) > $@ + +clean: + rm -f memeuse.tsv + +.PHONY: memeuse.tsv diff --git a/extract_memeuse.awk b/extract_memeuse.awk @@ -0,0 +1,66 @@ +#!/usr/bin/awk -f +# get timestamp, user, and tag from irssi log in default format + +function extract_date(s) { + gsub(/--- Day changed ... /, "", s); + + match(s, /[0-9][0-9]/); + day = substr(s, RSTART, RLENGTH); + + match(s, /[0-9][0-9][0-9][0-9]/); + year = substr(s, RSTART, RLENGTH); + + month = s; + gsub(/.*Jan .*/, "01", month); + gsub(/.*Feb .*/, "02", month); + gsub(/.*Mar .*/, "03", month); + gsub(/.*Apr .*/, "04", month); + gsub(/.*May .*/, "05", month); + gsub(/.*Jun .*/, "06", month); + gsub(/.*Jul .*/, "07", month); + gsub(/.*Aug .*/, "08", month); + gsub(/.*Sep .*/, "09", month); + gsub(/.*Oct .*/, "10", month); + gsub(/.*Nov .*/, "11", month); + gsub(/.*Dec .*/, "12", month); +} + +function extract_user(s) { + if (/<.*>/) { # regular message + gsub(/.*<./, "", s); + gsub(/>.*/, "", s); + } else { # action + gsub(/.*\* /, "", s); + gsub(/ .*/, "", s); + } + return s; +} + +function extract_tag(s) { + match(s, /#.*/); + s = substr(s, RSTART, RLENGTH); + gsub(/ .*/, "", s); + return s; +} + +function extract_time(s) { + match(s, /[0-2][0-9]:[0-5][0-9]/); + return substr(s, RSTART, RLENGTH); +} + +{ } + +# update date for subsequent entries +/--- Day changed / { + extract_date($0); + n_days++; +} + +# find tag in current line +/ #[A-Za-z0-9]+/ { + if (! /< annna>/ && ! /#bitreich-en/) { + printf("%s\t%s\-%s\-%s\t%s\t%s\t%s\n", + n_days, year, month, day, + extract_time($0), extract_user($0), extract_tag($0)); + } +}