-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathRfunctions.py
132 lines (124 loc) · 4.83 KB
/
Rfunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Rfunctions.py: mimics functionality of R for locfdr.py
# Part of locfdr-python, http://www.github.com/buci/locfdr-python/
#
# Copyright (C) 2013 Abhinav Nellore (anellore@gmail.com)
# Copyright (C) 1995-2012 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License v2 as published by
# the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details
#
# You should have received a copy of the GNU General Public License
# along with this program in the file COPYING. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
try:
import numpy as np
except ImportError:
print 'numpy is required, but it was not found. Rfunctions was tested on numpy 1.7.1.'
raise
try:
import scipy as sp
from scipy.linalg import qr
from scipy.interpolate import splev
except ImportError:
print 'scipy is required, but it was not found. Rfunctions was tested on scipy 0.12.0.'
raise
class InputError(Exception):
"""Exception raised for errors in the input."""
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def splineDesign(knots, x, ord = 4, der = 0):
"""Reproduces behavior of R function splineDesign() for use by ns(). See R documentation for more information.
Python code uses scipy.interpolate.splev to get B-spline basis functions, while R code calls C.
Note that der is the same across x."""
knots = np.array(knots, dtype=np.float64)
x = np.array(x, dtype=np.float64)
knots.sort()
assert max(x) <= max(knots) and min(x) >= min(knots)
m = len(knots) - ord
v = np.zeros((m, len(x)))
d = np.eye(m, len(knots))
for i in range(m):
v[i] = splev(x, (knots, d[i], ord-1), der = der)
return v.transpose()
def ns(x, df = 1):
"""Reproduces only that output of the R function ns() necessary for locfdr(). See R documentation for more information."""
x = np.array(x)
assert df >= 1
knots = np.arange(0, 1, 1./df)[1:]
knots = [np.percentile(x,el*100) for el in knots]
knots = np.array([min(x)]*4 + knots + [max(x)]*4)
out = np.matrix(splineDesign(knots, x, 4)[:, 1:])
const = np.matrix(splineDesign(knots, [min(x), max(x)], 4, 2)[:, 1:])
QRdec = qr(const.transpose(), mode='full')
# note that basis may be different from that reported by R because the QR
# decomposition algo used by scipy is different.
# of course, it gives same GLM fit.
return (out * QRdec[0])[:, 2:]
def poly(x, df = 1):
"""Reproduces only that output of R function poly() necessary for locfdr(). See R documentation for more information."""
x = np.array(x)
if df < 1:
raise InputError('df must be > 0')
if df >= len(set(x)):
raise InputError('df must be >= bre')
xbar = np.mean(x)
x = x - xbar
X = np.matrix([[float(x[i])**j for i in xrange(len(x))] for j in xrange(df+1)]).transpose()
# this is required in R function; not sure it's necessary to check
assert np.linalg.matrix_rank(X) >= df
QRdec = qr(X, mode='full')
z = np.zeros(QRdec[1].shape)
for i in range(min(z.shape)):
z[i,i] = QRdec[1][i,i]
raw = QRdec[0] * sp.matrix(z)
norm2 = np.sum(np.power(raw,2), axis=0)
Z = raw / np.sqrt(norm2).repeat(len(x)).reshape(raw.shape[1], raw.shape[0]).transpose()
return Z[:,1:]
def approx(x, y, xout, rule = 1, ties = 'ordered', tieprecision = 10):
"""Mimics R function approx(). ties can be either 'ordered' or 'mean'.
'ordered' breaks ties by distributing tied x values at intervals of 1e(-tieprecision).
This way, given some x-value X with ties, the interpolating function approaches the max y-value
at x from the right and the min y value at x from the left. tieprecision is ignored if ties = 'mean.'"""
x = np.array(x, dtype=np.float64)
y = np.array(y, dtype=np.float64)
xout = np.array(xout, dtype=np.float64)
holder = {}
for i,el in enumerate(x):
if not holder.has_key(el):
holder[el] = []
holder[el].append(y[i])
together = []
if ties != 'ordered':
for key in holder:
together.append([key, np.mean(holder[key])])
else:
for key in holder:
reps = len(holder[key])
holder[key].sort()
for i,el in enumerate(holder[key]):
together.append([key-(reps-i-1)*np.power(10., -tieprecision), el])
together.sort(key=lambda lam: lam[0])
together = np.array(together).transpose()
vals = sp.interpolate.interp1d(together[0, :], together[1, :], bounds_error = False)(xout)
if rule == 2:
try:
for i,el in enumerate(vals):
if el > together[0, -1]:
vals[i] = together[1, -1]
elif el < together[0, 0]:
vals[i] = together[1, 0]
except TypeError:
if vals > together[0, -1]:
vals = together[1, -1]
elif vals < together[0, 0]:
vals = together[0, 0]
return vals