Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Baluev method plus an notebook example #14

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100,000 changes: 100,000 additions & 0 deletions examples/all_gaussian.dat

Large diffs are not rendered by default.

261 changes: 261 additions & 0 deletions examples/baluev_method_for_false_alarm_probability.ipynb

Large diffs are not rendered by default.

238 changes: 238 additions & 0 deletions examples/linear_faps.ipynb

Large diffs are not rendered by default.

Binary file added examples/linear_faps.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
110 changes: 110 additions & 0 deletions examples/linear_faps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#Author: Nicholas Hunt-Walker
#Date: 9/23/2015
#License: BSD
#Purpose: We calculate False Alarm Probabilities (FAPs) for periods measured for
# LINEAR objects. The calculation of FAPs is derived from Baluev 2008.
# Once calculated, we want to visualize the distribution of FAPs for
# LINEAR objects.
import numpy as np

import matplotlib.pyplot as plt

from gatspy.periodic import LombScargle, LombScargleFast

from astroML.datasets import fetch_LINEAR_sample
from astroML.time_series import search_frequencies, lomb_scargle, MultiTermFit
from astroML.plotting import setup_text_plots
setup_text_plots(fontsize=9, usetex=True)

#------------------------------------------------------------
#Load the dataset
data = fetch_LINEAR_sample()
ids = data.ids

#------------------------------------------------------------
#Compute the best frequencies
def compute_best_frequencies(ids, quiet=True):
results = {}
for i in ids:
t, y, dy = data[i].T

ls = LombScargleFast().fit(t, y, dy)
ls.optimizer.quiet=quiet
ls.optimizer.period_range = (10**-1.5, 10)

periods = ls.find_best_periods()
fap = ls.false_alarm_max()
Npoints = len(t)
scores = ls.score(periods)

results[i] = [periods, fap, Npoints, scores]

return results

results = compute_best_frequencies(ids)

faps = np.array([results[ii][1] for ii in ids])
# quartile = np.percentile(faps, [75,25])
# quartile_width = quartile[0] - quartile[1]
# binwidth = 2 * quartile_width / (faps.size)**(1./3)

fig = plt.figure(figsize=(8,8))
fig.subplots_adjust(bottom=0.15)
titstr = "Distribution of False Alarm Probabilities for {0} LINEAR Objects"

ax = plt.subplot(211)
ax.set_title(titstr.format(sum(faps > 0)) + " with FAP $>$ 0")
ax.hist(faps[faps > 0], bins=np.linspace(0,1.0,101))
ax.set_ylabel("N")
ax.set_xlim(0,1)
ax.minorticks_on()

ax = plt.subplot(212)
ax.set_title(titstr.format(faps.size) + "; all")
ax.hist(faps, bins=np.linspace(0,1.0,21))
ax.set_xlabel("Maximum False Alarm Probability")
ax.set_ylabel("N")
ax.set_xlim(0,1)
ax.minorticks_on()

plt.show()

fout = "/Users/Nick/Documents/my_python/mystuff/astroML_testbed/faps.dat"
f = open(fout, 'w')
# want periods 1-5 and the false alarm probability

f.write("ids,p0,p1,p2,p3,p4,faps,npoints,scores\n")
for ii in ids:
the_id = str(ii)
periods = ",".join(str(results[ii][0]).strip("[").strip("]")[1:].strip().split())
the_fap = str(results[ii][1])
the_points = str(results[ii][2])
the_scores = str(results[ii][3])
outline = ",".join([the_id,periods,the_fap,the_points,the_scores]) + "\n"
f.write(outline)

f.close()

Npoints = np.zeros(ids.size)
for ii in range(len(ids)):
Npoints[ii] = len(data[ids[ii]].T[0])

plt.figure(figsize=(8,4))
plt.hist(Npoints, bins=np.arange(30, 1170, 20))
plt.xlim(30, 1170)
plt.xlabel("$N_observations$")
plt.ylabel("$N_objects$")
plt.minorticks_on()
plt.show()

# =======
# want to find out why so many fap values are == 0
# start by finding out characteristics of FAP values > 0.5
bad_ids = ids[faps > 0.5]
good_ids = ids[faps < 0.1]

# t, y, dy = data[good_ids[10]].T
t, y, dy = data[bad_ids[10]].T
plt.errorbar(t, y, dy, fmt=".k", ecolor="gray",
ms=4, lw=1, capsize=1.5)
plt.show()
74 changes: 74 additions & 0 deletions gatspy/periodic/lomb_scargle.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import warnings

import numpy as np
from scipy.special import gamma

from .modeler import PeriodicModeler

Expand Down Expand Up @@ -204,6 +205,79 @@ def _predict(self, t, period):
def _score(self, periods):
return LeastSquaresMixin._score(self, periods)

def false_alarm_single(self, periods, logarithmic=False):
"""
Calculate the False Alarm Probability for every period
provided by the input param.
Following the method outlined in Baluev 2008, Table 1.

Note that this is an upper limit on the False Alarm Probability,
and that it assumes low spectral leakage when in reality it's usually
an issue.

If 'logarithmic'=True, false_alarm_single returns log10(FAP).
Else, false_alarm_single returns FAP.
"""
t = self.t
y = self.y
dy = self.dy
N = t.size
Nh = N - 1
Nk = N - 3

# In Baluev 2008, z = z_scaled * Nh / 2
z_scaled = self.score(periods)

if logarithmic:
log_fap = (0.5 * Nk) * np.log10(1 - z_scaled)
return log_fap

else:
fap = (1 - z_scaled) ** (0.5 * Nk)
return fap

def false_alarm_max(self):
"""
Calculate the False Alarm Probability for the best period.
Following the method outlined in Baluev 2008, Table 1 and
eq. 5

Note that this is an upper limit on the False Alarm Probability,
and that it assumes low spectral leakage when in reality it's usually
an issue.

For FAP values near 0 (< 1E-8), will return 0.0.
"""
t = self.t
y = self.y
dy = self.dy
N = t.size
Nh = N - 1
Nk = N - 3
best_period = self.best_period

z = self.score(best_period) * 0.5 * Nh
fmax = 1 / min(self.optimizer.period_range)

if Nh < 10:
gamma_H = np.sqrt(2. / Nh) * gamma(0.5 * Nh) / gamma(0.5 * (Nh - 1))
else:
gamma_H = 1

def avg(phi, dy):
return np.sum(phi / dy ** 2)
def bar(phi, dy):
return avg(phi, dy) / avg(np.ones(N), dy)

var_t = bar(t ** 2, dy) - bar(t, dy) ** 2
W = fmax * np.sqrt(4 * np.pi * var_t)
tau = gamma_H * W * (1-2 * z / Nh) ** (0.5 * (Nk - 1)) * np.sqrt(z)

Psingle = 1 - self.false_alarm_single(best_period)
Pmax = Psingle * np.exp(-tau)

return 1 - Pmax


class LombScargleAstroML(LombScargle):
"""Lomb-Scargle Periodogram Implementation using AstroML
Expand Down
13 changes: 11 additions & 2 deletions gatspy/periodic/modeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def fit(self, t, y, dy=None):

if self.fit_period:
self._best_period = self._calc_best_period()

return self

def predict(self, t, period=None):
Expand Down Expand Up @@ -132,7 +132,7 @@ def score(self, periods=None):
Parameters
----------
periods : float or array_like
Array of angular frequencies at which to compute
Array of periods at which to compute
the periodogram.

Returns
Expand Down Expand Up @@ -180,6 +180,15 @@ def _predict(self, t, period):
"""Predict the model values at the given times"""
raise NotImplementedError()

def _false_alarm_single(self, periods):
"""Calculate the False Alarm Probability for every period
provided by the input param."""
raise NotImplementedError()

def _false_alarm_max(self):
"""Calculate the False Alarm Probability for the best period."""
raise NotImplementedError()


class PeriodicModelerMultiband(PeriodicModeler):
"""Base class for periodic modeling on multiband data"""
Expand Down
21 changes: 21 additions & 0 deletions gatspy/periodic/tests/test_false_alarm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import division
import warnings

import numpy as np
from numpy.testing import assert_allclose, assert_equal, assert_almost_equal

from .. import LombScargle

def test_lombscargle_fap(){
t = np.linspace(0, 100, 1000)
np.random.seed(4)
y = np.random.normal(0, 1.0, t.size)

fmax = 500/t.max()

ls = LombScargle().fit(t, y, 1.0)
ls.optimizer.quiet = True
ls.optimizer.period_range = (1/fmax, t.max())

assert_almost_equal(ls.false_alarm_max(), 0.17453, decimal=3)
}