pg_orrery/src/de_reader.c
Ryan Malloy 9e420a1fc9 Harden DE reader: layout validation, Chebyshev clamping, FD safety
Three independent reviews (manual, failure-mode analysis, JPL spec
cross-reference) confirmed the mathematical core is correct. This
commit addresses defensive coding and operational behavior:

- Fix header byte-offset comments (12 groups = 144 bytes, not 156)
- Add layout validation before Chebyshev interpolation (prevent
  buffer underread for bodies absent from a DE edition)
- Clamp Chebyshev argument to [-1,+1] with debug assertion for
  values beyond 1e-10 tolerance (catches structural normalization
  errors vs normal FP boundary rounding)
- Add O_CLOEXEC to prevent FD leaks to child processes
- Change GUC from PGC_SIGHUP to PGC_BACKEND to match actual
  one-shot initialization behavior
- Fix provider consistency: planet_velocity_de() now accepts a
  use_de flag to match the provider used for positions (rule 7)
- Optimize eph_de_moon() to use raw geocentric Moon (center=-1)
  instead of computing Earth just to subtract it back out
- Pre-compute obliquity trig constants (verified to full precision)
- Tighten canary check from 0.9-1.1 AU to 0.97-1.04 AU
- Return NAN for missing constants (0.0 was ambiguous)
- Add _Static_assert for sizeof(double) == 8
- Remove unused HDR_* macros
- Zero Datum values before setting null flags in ephemeris_info
- Replace magic numbers with DE_MOON/DE_SUN constants

All 13 regression tests pass. Zero compiler warnings.
2026-02-17 00:20:31 -07:00

695 lines
17 KiB
C

/*
* de_reader.c -- Clean-room JPL Development Ephemeris reader
*
* Reads JPL DE binary format (DE430/DE440/DE441).
* No GPL dependency: derived from the public format specification.
*
* The JPL binary ephemeris consists of fixed-size records:
* Record 1: Header (titles, constant names, JD range, layout table)
* Record 2: Constant values
* Records 3+: Chebyshev coefficients for all bodies
*
* Each data record covers a time interval (32 days for DE441).
* Within a record, each body has Chebyshev coefficients for x,y,z
* (or longitude/latitude/distance), possibly split into sub-intervals.
*
* Evaluation uses the Clenshaw recurrence for Chebyshev polynomials.
*
* Reference:
* JPL IOM 312.N-03-009 (Standish 1998)
* "Explanatory Supplement to the Astronomical Almanac" Ch. 8
*/
#include "de_reader.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <fcntl.h>
/* swap_double() assumes 8-byte doubles (IEEE 754 on all platforms PG supports) */
_Static_assert(sizeof(double) == 8, "DE reader requires 8-byte doubles");
/* Known AU value for byte-order detection */
#define DE_AU_KNOWN 149597870.700
/*
* Byte-swap a double (for big-endian DE files on little-endian hosts).
*/
static void
swap_double(double *val)
{
unsigned char *p = (unsigned char *)val;
unsigned char tmp;
tmp = p[0]; p[0] = p[7]; p[7] = tmp;
tmp = p[1]; p[1] = p[6]; p[6] = tmp;
tmp = p[2]; p[2] = p[5]; p[5] = tmp;
tmp = p[3]; p[3] = p[4]; p[4] = tmp;
}
/*
* Swap an array of doubles in-place.
*/
static void
swap_doubles(double *arr, int count)
{
int i;
for (i = 0; i < count; i++)
swap_double(&arr[i]);
}
/*
* Read exactly n bytes from fd at the given offset.
* Returns 0 on success, -1 on failure.
*/
static int
read_at(int fd, void *buf, size_t n, off_t offset)
{
ssize_t total = 0;
ssize_t got;
if (lseek(fd, offset, SEEK_SET) == (off_t)-1)
return -1;
while ((size_t)total < n)
{
got = read(fd, (char *)buf + total, n - total);
if (got <= 0)
return -1;
total += got;
}
return 0;
}
/*
* Evaluate a Chebyshev polynomial using the Clenshaw recurrence.
*
* coeffs: array of Chebyshev coefficients (T0, T1, ..., T_{n-1})
* n: number of coefficients
* x: normalized argument in [-1, +1]
*
* Returns: sum_{i=0}^{n-1} coeffs[i] * T_i(x)
*/
static double
chebyshev_eval(const double *coeffs, int n, double x)
{
double bk1 = 0.0, bk2 = 0.0, bk;
int i;
for (i = n - 1; i >= 1; i--)
{
bk = 2.0 * x * bk1 - bk2 + coeffs[i];
bk2 = bk1;
bk1 = bk;
}
return x * bk1 - bk2 + coeffs[0];
}
/*
* Interpolate a single component (x, y, or z) for a body at a JD.
*
* h: reader handle (with record already loaded in record_buf)
* body: body group index (0-12)
* comp: component (0=x, 1=y, 2=z)
* jd: Julian date
*
* Returns the interpolated value.
*/
static double
interp_component(de_handle *h, int body, int comp, double jd)
{
de_body_layout *lay = &h->layout[body];
double rec_start, sub_length, t_sub, x;
int sub_idx, coeff_offset;
/* Record start JD */
rec_start = h->record_buf[0];
/* Sub-interval length in days */
sub_length = h->interval_days / lay->nsub;
/* Which sub-interval? */
t_sub = jd - rec_start;
sub_idx = (int)(t_sub / sub_length);
if (sub_idx >= lay->nsub)
sub_idx = lay->nsub - 1;
if (sub_idx < 0)
sub_idx = 0;
/* Normalize to [-1, +1] within the sub-interval */
x = 2.0 * (t_sub - sub_idx * sub_length) / sub_length - 1.0;
/* Clamp to [-1, +1]: floating-point arithmetic at interval
* boundaries can produce |x| slightly > 1.0, which causes
* Chebyshev polynomials to diverge. Legitimate rounding should
* never exceed ~1e-14; anything larger indicates a structural
* arithmetic error in the normalization above. */
if (x > 1.0)
{
assert(x < 1.0 + 1e-10);
x = 1.0;
}
if (x < -1.0)
{
assert(x > -1.0 - 1e-10);
x = -1.0;
}
/*
* Coefficient offset in the record buffer.
* Layout offset is 1-based (Fortran convention), convert to 0-based.
* Each sub-interval has ncoeff coefficients for each of 3 components.
*/
coeff_offset = (lay->offset - 1)
+ sub_idx * lay->ncoeff * 3
+ comp * lay->ncoeff;
return chebyshev_eval(&h->record_buf[coeff_offset], lay->ncoeff, x);
}
/*
* Load the record containing the given JD into the handle's buffer.
* Returns DE_OK or DE_ERR_*.
*/
static int
load_record(de_handle *h, double jd)
{
int recno;
off_t offset;
if (jd < h->start_jd || jd > h->end_jd)
return DE_ERR_RANGE;
recno = (int)((jd - h->start_jd) / h->interval_days);
/* Clamp to last record if jd == end_jd */
{
int max_rec = (int)((h->end_jd - h->start_jd) / h->interval_days) - 1;
if (recno > max_rec)
recno = max_rec;
}
/* Already cached? */
if (recno == h->cached_recno)
return DE_OK;
/* Seek and read the record (skip 2 header records) */
offset = h->data_offset + (off_t)recno * h->record_bytes;
if (read_at(h->fd, h->record_buf, h->record_bytes, offset) != 0)
return DE_ERR_READ;
if (h->swap_bytes)
swap_doubles(h->record_buf, h->ncoeff);
h->cached_recno = recno;
return DE_OK;
}
/*
* Parse the header and validate the ephemeris file.
* Supports DE405 and later (DE430, DE440, DE441).
* Earlier formats have different header sizes and are not supported.
*/
static int
parse_header(de_handle *h)
{
/*
* The header spans 2 records of ncoeff doubles each.
* But we don't know ncoeff yet — it's at a fixed position
* in the first record.
*
* Header layout (byte offsets for first record):
* 0..251: 3 title lines (84 chars each)
* 252..2651: 400 constant names (6 chars each)
* 2652..2675: SS[3] = {start_jd, end_jd, interval_days}
* 2676..2679: NCON (int32)
* 2680..2687: AU (double)
* 2688..2695: EMRAT (double)
* 2696..2839: IPT[12][3] = 12 body groups x 3 ints (offset, ncoeff, nsub)
* 2840..2843: DE version number (int32)
* 2844..2855: IPT[12][3] for 13th body (librations)
*
* After IPT parsing, we know ncoeff and can size the record buffer.
*
* All byte offsets assume the first record starts at byte 0.
* The record size in bytes = ncoeff * 8.
*/
unsigned char buf[4096]; /* DE405+ header fits in 4096 bytes */
double ss[3];
double au_val;
int32_t ncon;
int ipt[13][3];
int32_t de_ver;
int i, j;
int max_offset_needed;
/* Read the first 4096 bytes of the file */
if (read_at(h->fd, buf, 4096, 0) != 0)
return DE_ERR_READ;
/* SS: start_jd, end_jd, interval (at byte 2652) */
memcpy(ss, buf + 2652, 3 * sizeof(double));
/* AU (at byte 2680) */
memcpy(&au_val, buf + 2680, sizeof(double));
/*
* Byte-order detection: compare AU against known value.
* If it doesn't match, try byte-swapping.
*/
h->swap_bytes = 0;
if (fabs(au_val - DE_AU_KNOWN) > 1.0)
{
swap_double(&au_val);
if (fabs(au_val - DE_AU_KNOWN) > 1.0)
return DE_ERR_ENDIAN;
h->swap_bytes = 1;
swap_doubles(ss, 3);
}
h->start_jd = ss[0];
h->end_jd = ss[1];
h->interval_days = ss[2];
h->au_km = au_val;
/* NCON at byte 2676 */
memcpy(&ncon, buf + 2676, sizeof(int32_t));
if (h->swap_bytes)
{
unsigned char *p = (unsigned char *)&ncon;
unsigned char tmp;
tmp = p[0]; p[0] = p[3]; p[3] = tmp;
tmp = p[1]; p[1] = p[2]; p[2] = tmp;
}
/* EMRAT at byte 2688 */
memcpy(&h->emrat, buf + 2688, sizeof(double));
if (h->swap_bytes)
swap_double(&h->emrat);
/* IPT: 12 body groups at byte 2696, each 3 x int32 */
for (i = 0; i < 12; i++)
{
int32_t vals[3];
memcpy(vals, buf + 2696 + i * 12, 12);
if (h->swap_bytes)
{
for (j = 0; j < 3; j++)
{
unsigned char *p = (unsigned char *)&vals[j];
unsigned char tmp;
tmp = p[0]; p[0] = p[3]; p[3] = tmp;
tmp = p[1]; p[1] = p[2]; p[2] = tmp;
}
}
ipt[i][0] = vals[0];
ipt[i][1] = vals[1];
ipt[i][2] = vals[2];
}
/* DE version at byte 2840 */
memcpy(&de_ver, buf + 2840, sizeof(int32_t));
if (h->swap_bytes)
{
unsigned char *p = (unsigned char *)&de_ver;
unsigned char tmp;
tmp = p[0]; p[0] = p[3]; p[3] = tmp;
tmp = p[1]; p[1] = p[2]; p[2] = tmp;
}
h->de_version = de_ver;
/* IPT[12] (librations) at byte 2844 */
{
int32_t vals[3];
memcpy(vals, buf + 2844, 12);
if (h->swap_bytes)
{
for (j = 0; j < 3; j++)
{
unsigned char *p = (unsigned char *)&vals[j];
unsigned char tmp;
tmp = p[0]; p[0] = p[3]; p[3] = tmp;
tmp = p[1]; p[1] = p[2]; p[2] = tmp;
}
}
ipt[12][0] = vals[0];
ipt[12][1] = vals[1];
ipt[12][2] = vals[2];
}
/* Store layout and compute ncoeff */
max_offset_needed = 0;
for (i = 0; i < DE_NUM_BODIES; i++)
{
h->layout[i].offset = ipt[i][0];
h->layout[i].ncoeff = ipt[i][1];
h->layout[i].nsub = ipt[i][2];
if (ipt[i][0] > 0 && ipt[i][1] > 0 && ipt[i][2] > 0)
{
int end;
int ncomp = (i == DE_NUTATION) ? 2 : 3;
end = (ipt[i][0] - 1) + ipt[i][1] * ncomp * ipt[i][2];
if (end > max_offset_needed)
max_offset_needed = end;
}
}
/*
* ncoeff: the header record size in doubles.
* The record must be large enough to hold all coefficient data
* plus the 2-double JD range at the start of each data record.
*/
h->ncoeff = max_offset_needed;
if (h->ncoeff < 2)
return DE_ERR_HEADER;
h->record_bytes = h->ncoeff * sizeof(double);
/* Data starts after 2 header records */
h->data_offset = 2L * h->record_bytes;
/* Validate: start_jd < end_jd, interval > 0 */
if (h->start_jd >= h->end_jd || h->interval_days <= 0.0)
return DE_ERR_HEADER;
return DE_OK;
}
/*
* Canary validation: evaluate Earth at J2000.0 and check against
* known DE441 reference position.
*
* Expected Earth ICRS position at J2000.0 (JD 2451545.0):
* x ~ -0.1771 AU, y ~ 0.8873 AU, z ~ 0.3848 AU
*
* Tolerance: 0.01 AU (very loose — just catches gross file corruption).
*/
static int
canary_check(de_handle *h)
{
double pos[3];
int err;
err = de_reader_get_pos(h, 2451545.0, DE_EMB, DE_SUN, pos);
if (err != DE_OK)
return DE_ERR_CANARY;
/* Earth-Sun distance varies 0.983-1.017 AU over the year.
* Tight tolerance catches garbled files that 0.9-1.1 would miss. */
{
double dist = sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2]);
if (dist < 0.97 || dist > 1.04)
return DE_ERR_CANARY;
}
return DE_OK;
}
de_handle *
de_reader_open(const char *path, int *errcode)
{
de_handle *h;
int err;
h = (de_handle *)calloc(1, sizeof(de_handle));
if (!h)
{
*errcode = DE_ERR_OPEN;
return NULL;
}
h->fd = -1;
h->cached_recno = -1;
h->record_buf = NULL;
/* Open the ephemeris file (read-only, close-on-exec to prevent FD
* leaks to child processes such as COPY ... PROGRAM or archiving) */
h->fd = open(path, O_RDONLY | O_CLOEXEC);
if (h->fd < 0)
{
*errcode = DE_ERR_OPEN;
free(h);
return NULL;
}
/* Parse header */
err = parse_header(h);
if (err != DE_OK)
{
*errcode = err;
close(h->fd);
free(h);
return NULL;
}
/* Allocate record buffer */
h->record_buf = (double *)calloc(h->ncoeff, sizeof(double));
if (!h->record_buf)
{
*errcode = DE_ERR_OPEN;
close(h->fd);
free(h);
return NULL;
}
/* Canary check */
err = canary_check(h);
if (err != DE_OK)
{
*errcode = err;
free(h->record_buf);
close(h->fd);
free(h);
return NULL;
}
*errcode = DE_OK;
return h;
}
/*
* Get raw body position (3 components) from the ephemeris.
* No center subtraction. Returns position in AU (ICRS equatorial).
*/
static int
get_raw_pos(de_handle *h, double jd, int body, double pos[3])
{
int err;
if (body < 0 || body > DE_SUN)
return DE_ERR_BODY;
/* Nutation and libration are not position queries */
if (body == DE_NUTATION || body == DE_LIBRATION)
return DE_ERR_BODY;
/* Body exists in the DE body table but may have no coefficients
* in this particular DE edition (e.g., Pluto omitted from some files).
* Returns DE_ERR_BODY since callers handle both "invalid index" and
* "not present" by falling back to VSOP87. */
{
de_body_layout *lay = &h->layout[body];
if (lay->offset <= 0 || lay->ncoeff <= 0 || lay->nsub <= 0)
return DE_ERR_BODY;
}
err = load_record(h, jd);
if (err != DE_OK)
return err;
pos[0] = interp_component(h, body, 0, jd);
pos[1] = interp_component(h, body, 1, jd);
pos[2] = interp_component(h, body, 2, jd);
/* Convert from km to AU */
pos[0] /= h->au_km;
pos[1] /= h->au_km;
pos[2] /= h->au_km;
return DE_OK;
}
/*
* Derive Earth position from Earth-Moon Barycenter and Moon.
*
* Earth = EMB - Moon * (1 / (1 + EMRAT))
*
* where EMRAT = M_earth / M_moon ~ 81.3.
* The Moon position in the DE file is geocentric, so:
* Earth = EMB - Moon / (1 + EMRAT)
*/
static int
get_earth_pos(de_handle *h, double jd, double pos[3])
{
double emb[3], moon[3];
double factor;
int err;
err = get_raw_pos(h, jd, DE_EMB, emb);
if (err != DE_OK)
return err;
err = get_raw_pos(h, jd, DE_MOON, moon);
if (err != DE_OK)
return err;
factor = 1.0 / (1.0 + h->emrat);
pos[0] = emb[0] - moon[0] * factor;
pos[1] = emb[1] - moon[1] * factor;
pos[2] = emb[2] - moon[2] * factor;
return DE_OK;
}
int
de_reader_get_pos(de_handle *h, double jd, int target, int center,
double pos[3])
{
double tpos[3], cpos[3];
int err;
if (!h)
return DE_ERR_OPEN;
/* Get target position */
if (target == DE_EMB)
{
/* EMB is stored directly */
err = get_raw_pos(h, jd, DE_EMB, tpos);
}
else if (target == DE_MOON && center != -1)
{
/*
* Moon in the file is geocentric. For Moon relative to
* something other than Earth, we need Earth + geocentric Moon.
*/
double earth[3], moon_geo[3];
err = get_earth_pos(h, jd, earth);
if (err != DE_OK)
return err;
err = get_raw_pos(h, jd, DE_MOON, moon_geo);
if (err != DE_OK)
return err;
/* Moon barycentric = Earth + geocentric_moon */
tpos[0] = earth[0] + moon_geo[0];
tpos[1] = earth[1] + moon_geo[1];
tpos[2] = earth[2] + moon_geo[2];
}
else if (target >= DE_MERCURY && target <= DE_PLUTO && target != DE_EMB)
{
/* Planets are stored directly as SSB-relative */
err = get_raw_pos(h, jd, target, tpos);
}
else if (target == DE_SUN)
{
err = get_raw_pos(h, jd, DE_SUN, tpos);
}
else
{
/* Raw mode for other body types */
err = get_raw_pos(h, jd, target, tpos);
}
if (err != DE_OK)
return err;
/* No center subtraction requested */
if (center < 0)
{
pos[0] = tpos[0];
pos[1] = tpos[1];
pos[2] = tpos[2];
return DE_OK;
}
/* Get center position */
if (center == DE_SUN)
{
err = get_raw_pos(h, jd, DE_SUN, cpos);
}
else if (center == DE_EMB)
{
err = get_raw_pos(h, jd, DE_EMB, cpos);
}
else if (center == 99)
{
/* Special code for "Earth" as center */
err = get_earth_pos(h, jd, cpos);
}
else if (center >= DE_MERCURY && center <= DE_PLUTO)
{
err = get_raw_pos(h, jd, center, cpos);
}
else
{
return DE_ERR_BODY;
}
if (err != DE_OK)
return err;
pos[0] = tpos[0] - cpos[0];
pos[1] = tpos[1] - cpos[1];
pos[2] = tpos[2] - cpos[2];
return DE_OK;
}
void
de_reader_close(de_handle *h)
{
if (!h)
return;
if (h->fd >= 0)
close(h->fd);
if (h->record_buf)
free(h->record_buf);
free(h);
}
double
de_reader_get_const(de_handle *h, const char *name)
{
/*
* Constant lookup requires reading constant names from record 1
* and values from record 2. For now, we expose the key values
* that are already parsed from the header.
*/
if (!h || !name)
return NAN;
if (strcmp(name, "AU") == 0)
return h->au_km;
if (strcmp(name, "EMRAT") == 0)
return h->emrat;
/* NAN sentinel: callers use isnan() to distinguish "not found"
* from a legitimate zero-valued constant. */
return NAN;
}