commit d08ff10b40479174b132e313726b406692972e35
Author: Anders Damsgaard <anders@adamsgaard.dk>
Date: Fri, 17 Apr 2020 09:55:02 +0200
Add working script extracting data from log
Diffstat:
2 files changed, 75 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,9 @@
+log = ~/.irssi/log/Freenode/\#bitreich-en.log
+
+memeuse.tsv: extract_memeuse.awk $(log)
+ awk -f extract_memeuse.awk $(log) > $@
+
+clean:
+ rm -f memeuse.tsv
+
+.PHONY: memeuse.tsv
diff --git a/extract_memeuse.awk b/extract_memeuse.awk
@@ -0,0 +1,66 @@
+#!/usr/bin/awk -f
+# get timestamp, user, and tag from irssi log in default format
+
+function extract_date(s) {
+ gsub(/--- Day changed ... /, "", s);
+
+ match(s, /[0-9][0-9]/);
+ day = substr(s, RSTART, RLENGTH);
+
+ match(s, /[0-9][0-9][0-9][0-9]/);
+ year = substr(s, RSTART, RLENGTH);
+
+ month = s;
+ gsub(/.*Jan .*/, "01", month);
+ gsub(/.*Feb .*/, "02", month);
+ gsub(/.*Mar .*/, "03", month);
+ gsub(/.*Apr .*/, "04", month);
+ gsub(/.*May .*/, "05", month);
+ gsub(/.*Jun .*/, "06", month);
+ gsub(/.*Jul .*/, "07", month);
+ gsub(/.*Aug .*/, "08", month);
+ gsub(/.*Sep .*/, "09", month);
+ gsub(/.*Oct .*/, "10", month);
+ gsub(/.*Nov .*/, "11", month);
+ gsub(/.*Dec .*/, "12", month);
+}
+
+function extract_user(s) {
+ if (/<.*>/) { # regular message
+ gsub(/.*<./, "", s);
+ gsub(/>.*/, "", s);
+ } else { # action
+ gsub(/.*\* /, "", s);
+ gsub(/ .*/, "", s);
+ }
+ return s;
+}
+
+function extract_tag(s) {
+ match(s, /#.*/);
+ s = substr(s, RSTART, RLENGTH);
+ gsub(/ .*/, "", s);
+ return s;
+}
+
+function extract_time(s) {
+ match(s, /[0-2][0-9]:[0-5][0-9]/);
+ return substr(s, RSTART, RLENGTH);
+}
+
+{ }
+
+# update date for subsequent entries
+/--- Day changed / {
+ extract_date($0);
+ n_days++;
+}
+
+# find tag in current line
+/ #[A-Za-z0-9]+/ {
+ if (! /< annna>/ && ! /#bitreich-en/) {
+ printf("%s\t%s\-%s\-%s\t%s\t%s\t%s\n",
+ n_days, year, month, day,
+ extract_time($0), extract_user($0), extract_tag($0));
+ }
+}