numtools

perform numerical operations on vectors and matrices in unix pipes
git clone git://src.adamsgaard.dk/numtools # fast
git clone https://src.adamsgaard.dk/numtools.git # slow
Log | Files | Refs | README | LICENSE Back to index

linregress (2493B)


      1 #!/usr/bin/env python
      2 import sys, getopt
      3 import numpy
      4 import scipy.stats
      5 
      6 def usage():
      7 	print("usage: {} [-h] [-o outfile] [-s significance_level]".format(sys.argv[0]))
      8 
      9 def read2dstdin():
     10 	input = numpy.loadtxt(sys.stdin)
     11 	return input[:,0], input[:, 1]
     12 
     13 def reportregress(res):
     14 	print("slope =\t{:5g}\nslope_stderr =\t{:5g}".format(res.slope, res.stderr))
     15 	print("intercept =\t{:5g}\nintercept_stderr =\t{:5g}".format(res.intercept, res.intercept_stderr))
     16 	print("r_value =\t{:5g}\np_value =\t{:5g}\n".format(res.rvalue, res.pvalue))
     17 
     18 def reportsignif(p, significlvl):
     19 	print("Checking null hypothesis using the Wald Test with t-distribution of the test statistic.")
     20 	print("The p-value is ", end="")
     21 	if (p < significlvl):
     22 		print("less than the significance level ({:g}),".format(significlvl))
     23 		print("which means that the null hypothesis of zero slope is REJECTED.\n")
     24 	else:
     25 		print("greater or equal than the significance level ({:g}),".format(significlvl))
     26 		print("which means that the null hypothesis of zero slope CANNOT be rejected.\n")
     27 
     28 def reportcorr(r):
     29 	print("The correlation coefficient (r-value) denotes ", end="")
     30 	if (abs(r) < 0.01):
     31 		print("zero correlation.")
     32 	else:
     33 		print("a ", end="")
     34 		if (abs(r) < 0.2):
     35 			print("weak", end="")
     36 		elif (abs(r) < 0.3):
     37 			print("moderate", end="")
     38 		elif (abs(r) < 0.4):
     39 			print("strong", end="")
     40 		elif (abs(r) < 0.7):
     41 			print("very strong", end="")
     42 		print(" positive" if r > 0.0 else " negative")
     43 		print(" relationship.")
     44 
     45 def plotresult(x, y, res, outfile):
     46 	import matplotlib.pyplot as plt
     47 	plt.figure()
     48 	plt.scatter(x, y, label="data")
     49 	x_ = numpy.linspace(min(x), max(x))
     50 	y_fit = res.slope * x_ + res.intercept
     51 	plt.title("p-value: {:.3g}, corr. coeff.: {:.3g}".format(res.pvalue, res.rvalue))
     52 	plt.plot(x_, y_fit, "-k", label="slope: {:.3g}, intercept: {:.3g}".format(res.slope, res.intercept))
     53 	plt.legend()
     54 	plt.tight_layout()
     55 	plt.savefig(outfile)
     56 
     57 def main(argv):
     58 	significlvl = 0.05
     59 	outfile = ''
     60 	try:
     61 		opts, args = getopt.getopt(argv, "ho:s:")
     62 	except getopt.GetoptError:
     63 		usage()
     64 		sys.exit(2)
     65 	for opt, arg in opts:
     66 		if opt == "-h":
     67 			usage()
     68 			sys.exit(0)
     69 		elif opt == "-s":
     70 			significlvl = float(arg)
     71 		elif opt == "-o":
     72 			outfile = arg
     73 
     74 	x, y = read2dstdin()
     75 	res = scipy.stats.linregress(x, y, alternative="two-sided")
     76 	reportregress(res)
     77 	reportsignif(res.pvalue, significlvl)
     78 	reportcorr(res.rvalue)
     79 	if outfile:
     80 		plotresult(x, y, res, outfile)
     81 
     82 if __name__ == "__main__":
     83 	main(sys.argv[1:])