pg_orrery/bench/build_catalog.py
Ryan Malloy db1f478e4f Update CLAUDE.md for v0.10.0, gitignore bench catalogs, add roadmap
- CLAUDE.md: 106 -> 114 functions, 18 -> 19 test suites, add aberration
  suite, DE apparent variants, equatorial spatial domain to tables
- .gitignore: ignore downloaded TLE catalogs in bench/ (alpha5, celestrak,
  satnogs, spacetrack, supgp, mega/merged, cookies)
- docs/TODO-v0.10.0.md: rewrite as post-v0.10.0 roadmap with next version
  candidates (make_orbital_elements, galilean_equatorial, equatorial GiST
  index, nutation, Delta T, rise/set)
- Track bench/build_catalog.py and agent thread message 001
2026-02-21 21:52:05 -07:00

174 lines
5.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Build a merged TLE catalog from multiple sources for pg_orrery benchmarks.
Usage:
# Merge existing TLE files into SQL
./build_catalog.py bench/spacetrack_everything.tle bench/celestrak_active.tle ...
# Pipe to psql
./build_catalog.py bench/*.tle | PGPORT=5499 psql -d contrib_regression
# Or generate SQL file
./build_catalog.py bench/*.tle > bench/load_catalog.sql
Deduplication: when the same NORAD ID appears in multiple files, the entry
with the newest epoch wins. This means CelesTrak SupGP data (fresher epochs)
automatically overrides stale Space-Track entries.
Alpha-5 NORAD IDs (T0002 etc.) are handled transparently — they parse into
integers >100,000 via the same logic as Bill Gray's get_el.c.
"""
import sys
import os
import re
from collections import OrderedDict
# Alpha-5 NORAD decoding — mirrors get_norad_number() in src/sgp4/get_el.c
_ALPHA5_SKIP = {'I', 'O'} # skipped in Alpha-5 encoding
def decode_norad(s):
"""Decode a 5-character NORAD field to integer. Handles Alpha-5."""
s = s.strip()
if not s:
return None
first = s[0]
if first.isdigit():
try:
return int(s)
except ValueError:
return None
elif first.isalpha() and first.isupper():
# Alpha-5: letter + 4 digits
val = ord(first) - ord('A')
if first > 'I':
val -= 1
if first > 'O':
val -= 1
try:
return val * 10000 + int(s[1:]) + 100000
except ValueError:
return None
return None
def parse_3le_file(filepath):
"""Parse a 3LE (or 2LE) file into a dict of norad_str -> (line1, line2, name, epoch)."""
objects = {}
try:
lines = open(filepath, errors='replace').readlines()
except FileNotFoundError:
print(f"# SKIP {filepath}: not found", file=sys.stderr)
return objects
i = 0
while i < len(lines):
line = lines[i].rstrip('\r\n')
if line.startswith('1 ') and i + 1 < len(lines) and lines[i + 1].rstrip('\r\n').startswith('2 '):
line1 = line.rstrip('\r\n')
line2 = lines[i + 1].rstrip('\r\n')
# Look back for name line (3LE format)
name = ''
if i > 0:
prev = lines[i - 1].rstrip('\r\n')
if prev and not prev.startswith(('1 ', '2 ')):
name = prev.strip()
# Extract NORAD ID (works for both standard and Alpha-5)
norad_field = line1[2:7]
norad_int = decode_norad(norad_field)
if norad_int is None:
i += 2
continue
norad_str = str(norad_int)
# Extract epoch (column 18-32 of line 1)
try:
epoch = float(line1[18:32].strip())
except (ValueError, IndexError):
epoch = 0.0
# Keep the entry with the newest epoch
if norad_str not in objects or epoch > objects[norad_str][3]:
objects[norad_str] = (line1, line2, name, epoch)
i += 2
else:
i += 1
return objects
def main():
if len(sys.argv) < 2:
print(__doc__, file=sys.stderr)
sys.exit(1)
# Parse --table-name option
table_name = 'bench_catalog'
files = []
i = 1
while i < len(sys.argv):
if sys.argv[i] == '--table' and i + 1 < len(sys.argv):
table_name = sys.argv[i + 1]
i += 2
elif sys.argv[i].startswith('--table='):
table_name = sys.argv[i].split('=', 1)[1]
i += 1
else:
files.append(sys.argv[i])
i += 1
# Merge all sources (later files override earlier for same NORAD ID if newer epoch)
mega = {}
for filepath in files:
objs = parse_3le_file(filepath)
new = updated = 0
for k, v in objs.items():
if k not in mega:
new += 1
mega[k] = v
elif v[3] > mega[k][3]:
updated += 1
mega[k] = v
basename = os.path.basename(filepath)
print(f"-- {basename}: {len(objs)} objects ({new} new, {updated} updated)", file=sys.stderr)
print(f"-- Total: {len(mega)} unique objects", file=sys.stderr)
# Emit SQL
print(f"-- pg_orrery benchmark catalog ({len(mega)} objects)")
print(f"-- Generated from {len(files)} TLE source files")
print(f"-- Sources: {', '.join(os.path.basename(f) for f in files)}")
print()
print(f"DROP TABLE IF EXISTS {table_name};")
print(f"CREATE TABLE {table_name} (")
print(f" id serial,")
print(f" name text,")
print(f" tle tle")
print(f");")
print()
count = 0
for norad_str in sorted(mega.keys(), key=lambda x: int(x)):
line1, line2, name, epoch = mega[norad_str]
if not name:
name = f'NORAD {norad_str}'
name_sql = name.replace("'", "''").replace('\\', '\\\\')
tle_str = f"{line1}\\n{line2}"
print(f"INSERT INTO {table_name} (name, tle) VALUES ('{name_sql}', E'{tle_str}');")
count += 1
print()
print(f"-- Loaded {count} objects")
if __name__ == '__main__':
main()