linregress (2493B)
1 #!/usr/bin/env python 2 import sys, getopt 3 import numpy 4 import scipy.stats 5 6 def usage(): 7 print("usage: {} [-h] [-o outfile] [-s significance_level]".format(sys.argv[0])) 8 9 def read2dstdin(): 10 input = numpy.loadtxt(sys.stdin) 11 return input[:,0], input[:, 1] 12 13 def reportregress(res): 14 print("slope =\t{:5g}\nslope_stderr =\t{:5g}".format(res.slope, res.stderr)) 15 print("intercept =\t{:5g}\nintercept_stderr =\t{:5g}".format(res.intercept, res.intercept_stderr)) 16 print("r_value =\t{:5g}\np_value =\t{:5g}\n".format(res.rvalue, res.pvalue)) 17 18 def reportsignif(p, significlvl): 19 print("Checking null hypothesis using the Wald Test with t-distribution of the test statistic.") 20 print("The p-value is ", end="") 21 if (p < significlvl): 22 print("less than the significance level ({:g}),".format(significlvl)) 23 print("which means that the null hypothesis of zero slope is REJECTED.\n") 24 else: 25 print("greater or equal than the significance level ({:g}),".format(significlvl)) 26 print("which means that the null hypothesis of zero slope CANNOT be rejected.\n") 27 28 def reportcorr(r): 29 print("The correlation coefficient (r-value) denotes ", end="") 30 if (abs(r) < 0.01): 31 print("zero correlation.") 32 else: 33 print("a ", end="") 34 if (abs(r) < 0.2): 35 print("weak", end="") 36 elif (abs(r) < 0.3): 37 print("moderate", end="") 38 elif (abs(r) < 0.4): 39 print("strong", end="") 40 elif (abs(r) < 0.7): 41 print("very strong", end="") 42 print(" positive" if r > 0.0 else " negative") 43 print(" relationship.") 44 45 def plotresult(x, y, res, outfile): 46 import matplotlib.pyplot as plt 47 plt.figure() 48 plt.scatter(x, y, label="data") 49 x_ = numpy.linspace(min(x), max(x)) 50 y_fit = res.slope * x_ + res.intercept 51 plt.title("p-value: {:.3g}, corr. coeff.: {:.3g}".format(res.pvalue, res.rvalue)) 52 plt.plot(x_, y_fit, "-k", label="slope: {:.3g}, intercept: {:.3g}".format(res.slope, res.intercept)) 53 plt.legend() 54 plt.tight_layout() 55 plt.savefig(outfile) 56 57 def main(argv): 58 significlvl = 0.05 59 outfile = '' 60 try: 61 opts, args = getopt.getopt(argv, "ho:s:") 62 except getopt.GetoptError: 63 usage() 64 sys.exit(2) 65 for opt, arg in opts: 66 if opt == "-h": 67 usage() 68 sys.exit(0) 69 elif opt == "-s": 70 significlvl = float(arg) 71 elif opt == "-o": 72 outfile = arg 73 74 x, y = read2dstdin() 75 res = scipy.stats.linregress(x, y, alternative="two-sided") 76 reportregress(res) 77 reportsignif(res.pvalue, significlvl) 78 reportcorr(res.rvalue) 79 if outfile: 80 plotresult(x, y, res, outfile) 81 82 if __name__ == "__main__": 83 main(sys.argv[1:])