add stddev(1) and stdvar(1) - numtools - perform numerical operations on vectors and matrices in unix pipes

commit 8ea1920df26393631c033b2e3955d88e6e20eaa2
parent 19cc9e1694767e6534501210180a9e8882fb85ca
Author: Anders Damsgaard <anders@adamsgaard.dk>
Date:   Mon,  9 May 2022 15:55:03 +0200

add stddev(1) and stdvar(1)

Diffstat:
M Makefile  | 4 ++++
A stddev.1  | 40 ++++++++++++++++++++++++++++++++++++++++
A stddev.c  | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A stdvar.1  | 40 ++++++++++++++++++++++++++++++++++++++++
A stdvar.c  | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

5 files changed, 212 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -15,6 +15,8 @@ BIN = \
 	randcounts \
 	range \
 	rangetest \
+	stddev \
+	stdvar \
 	sum \
 	transpose \
 
@@ -59,6 +61,8 @@ min: min.o
 randcounts: randcounts.o
 range: range.o
 rangetest: rangetest.o
+stddev: stddev.o
+stdvar: stdvar.o
 sum: sum.o
 transpose: transpose.o
 
diff --git a/stddev.1 b/stddev.1
@@ -0,0 +1,40 @@
+.Dd $Mdocdate$
+.Dt STDDEV 1
+.Os
+.Sh NAME
+.Nm stddev
+.Nd returns the standard deviation for each column
+.Sh SYNOPSIS
+.Nm
+.Op Fl u
+.Sh DESCRIPTION
+.Nm
+returns the corrected sample standard deviation (s) for each column of
+standard input.
+Input fields must be tab-separated and each line most contain the same
+number of fields.
+The output is always in full double precision.
+.Pp
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl u
+Return the uncorrected sample standard deviation instead.
+.El
+.Sh EXAMPLES
+Compute the corrected standard deviation for some input numbers:
+.Pp
+.Dl $ printf '10\n8\n10\n8\n8\n4\n' | stddev
+.Dl 2.1908902300206643
+.Pp
+Same as the previous example, but return the uncorrected standard
+deviation:
+.Dl $ printf '10\n8\n10\n8\n8\n4\n' | stddev -u
+.Dl 2
+.Sh SEE ALSO
+.Xr max 1 ,
+.Xr mean 1 ,
+.Xr min 1 ,
+.Xr stdvar 1 ,
+.Xr sum 1
+.Sh AUTHORS
+.An Anders Damsgaard Aq Mt anders@adamsgaard.dk
diff --git a/stddev.c b/stddev.c
@@ -0,0 +1,64 @@
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "arg.h"
+#include "util.h"
+
+char *argv0;
+
+static void
+usage(void)
+{
+	errx(1, "usage: %s [-u]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+	size_t i, j, nf = 0, nr = 0, correction = 1;
+	double *means = NULL, *stdvars = NULL, **vals = NULL;
+
+	if (pledge("stdio", NULL) == -1)
+		err(2, "pledge");
+
+	ARGBEGIN {
+	case 'u':
+		correction = 0;
+		break;
+	default:
+		usage();
+	} ARGEND;
+
+	nr = fscanmatrix(stdin, &vals, &nf);
+
+	if (!(means = calloc(nf, sizeof(double))) ||
+	    !(stdvars = calloc(nf, sizeof(double))))
+		err(1, "calloc");
+
+	for (i = 0; i < nf; i++) {
+		means[i] = 0.0;
+		for (j = 0; j < nr; j++)
+			means[i] += vals[j][i];
+		means[i] /= (double)nr;
+	}
+
+	for (i = 0; i < nf; i++) {
+		stdvars[i] = 0.0;
+		for (j = 0; j < nr; j++)
+			stdvars[i] += pow(vals[j][i] - means[i], 2.0);
+		stdvars[i] = sqrt(stdvars[i] / ((double)(nr - correction)));
+	}
+
+	printarr(stdvars, nf);
+
+	free(means);
+	free(stdvars);
+	for (i = 0; i < nr; i++)
+		free(vals[i]);
+	free(vals);
+
+	return 0;
+}
diff --git a/stdvar.1 b/stdvar.1
@@ -0,0 +1,40 @@
+.Dd $Mdocdate$
+.Dt STDVAR 1
+.Os
+.Sh NAME
+.Nm stdvar
+.Nd returns the standard variance for each column
+.Sh SYNOPSIS
+.Nm
+.Op Fl u
+.Sh DESCRIPTION
+.Nm
+returns the corrected sample standard variance (s²) for each column
+of standard input.
+Input fields must be tab-separated and each line most contain the same
+number of fields.
+The output is always in full double precision.
+.Pp
+The options are as follows:
+.Bl -tag -width Ds
+.It Fl u
+Return the uncorrected sample standard variance instead.
+.El
+.Sh EXAMPLES
+Compute the corrected standard variance for some input numbers:
+.Pp
+.Dl $ printf '10\n8\n10\n8\n8\n4\n' | stdvar
+.Dl 4.7999999999999998
+.Pp
+Same as the previous example, but return the uncorrected standard
+variance:
+.Dl $ printf '10\n8\n10\n8\n8\n4\n' | stdvar -u
+.Dl 4
+.Sh SEE ALSO
+.Xr max 1 ,
+.Xr mean 1 ,
+.Xr min 1 ,
+.Xr stddev 1 ,
+.Xr sum 1
+.Sh AUTHORS
+.An Anders Damsgaard Aq Mt anders@adamsgaard.dk
diff --git a/stdvar.c b/stdvar.c
@@ -0,0 +1,64 @@
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "arg.h"
+#include "util.h"
+
+char *argv0;
+
+static void
+usage(void)
+{
+	errx(1, "usage: %s [-u]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+	size_t i, j, nf = 0, nr = 0, correction = 1;
+	double *means = NULL, *stdvars = NULL, **vals = NULL;
+
+	if (pledge("stdio", NULL) == -1)
+		err(2, "pledge");
+
+	ARGBEGIN {
+	case 'u':
+		correction = 0;
+		break;
+	default:
+		usage();
+	} ARGEND;
+
+	nr = fscanmatrix(stdin, &vals, &nf);
+
+	if (!(means = calloc(nf, sizeof(double))) ||
+	    !(stdvars = calloc(nf, sizeof(double))))
+		err(1, "calloc");
+
+	for (i = 0; i < nf; i++) {
+		means[i] = 0.0;
+		for (j = 0; j < nr; j++)
+			means[i] += vals[j][i];
+		means[i] /= (double)nr;
+	}
+
+	for (i = 0; i < nf; i++) {
+		stdvars[i] = 0.0;
+		for (j = 0; j < nr; j++)
+			stdvars[i] += pow(vals[j][i] - means[i], 2.0);
+		stdvars[i] /= (double)(nr - correction);
+	}
+
+	printarr(stdvars, nf);
+
+	free(means);
+	free(stdvars);
+	for (i = 0; i < nr; i++)
+		free(vals[i]);
+	free(vals);
+
+	return 0;
+}

	numtools perform numerical operations on vectors and matrices in unix pipes
	git clone git://src.adamsgaard.dk/numtools # fast git clone https://src.adamsgaard.dk/numtools.git # slow
	Log \| Files \| Refs \| README \| LICENSE	Back to index

M	Makefile	\|	4	++++
A	stddev.1	\|	40	++++++++++++++++++++++++++++++++++++++++
A	stddev.c	\|	64	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	stdvar.1	\|	40	++++++++++++++++++++++++++++++++++++++++
A	stdvar.c	\|	64	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++