Private
Public Access
1
0
Files
rowsandall/scripts/export_courses.py

287 lines
9.1 KiB
Python

#!/usr/bin/env python
"""
export_from_rowsandall.py
Bulk export of all GeoCourse records from the Rowsandall database to the
rownative course library JSON format.
Usage (run from the root of the Rowsandall project):
python scripts/export_from_rowsandall.py --output /path/to/rownative/courses
Or dry-run to check what would be exported without writing files:
python scripts/export_from_rowsandall.py --dry-run
Output:
courses/
{id}.json one file per course
index.json flat index of all courses (no polygon detail)
All migrated courses are set to status "established".
Course authorship (manager) is intentionally not exported — personal data.
The submitted_by field is set to "migrated from Rowsandall" for all courses.
Dependencies: runs inside the Rowsandall Django project. No new packages needed.
"""
import os
import sys
import json
import argparse
import logging
from pathlib import Path
# ---------------------------------------------------------------------------
# Django setup — must happen before any model imports
# ---------------------------------------------------------------------------
# Assumes the script is placed in rowsandall/scripts/ and run from
# the project root. Adjust DJANGO_SETTINGS_MODULE if needed.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rowsandall_app.settings')
import django
django.setup()
# ---------------------------------------------------------------------------
# Model imports — after django.setup()
# ---------------------------------------------------------------------------
from rowers.models import GeoCourse, GeoPolygon, GeoPoint
# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%H:%M:%S',
)
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Core export logic
# ---------------------------------------------------------------------------
def export_course(course):
"""
Convert a GeoCourse ORM object to the rownative JSON schema.
Returns a dict ready for json.dumps().
"""
polygons = GeoPolygon.objects.filter(
course=course
).order_by('order_in_course')
polygon_list = []
for polygon in polygons:
points = GeoPoint.objects.filter(
polygon=polygon
).order_by('order_in_poly')
point_list = [
{'lat': p.latitude, 'lon': p.longitude}
for p in points
]
polygon_list.append({
'name': polygon.name,
'order': polygon.order_in_course,
'points': point_list,
})
# Compute center from polygon centroids, matching course_coord_center()
# in models.py — median of per-polygon centroid lat/lons.
center_lat = None
center_lon = None
if polygon_list:
centroid_lats = []
centroid_lons = []
for poly in polygon_list:
if poly['points']:
centroid_lats.append(
sum(p['lat'] for p in poly['points']) / len(poly['points'])
)
centroid_lons.append(
sum(p['lon'] for p in poly['points']) / len(poly['points'])
)
if centroid_lats:
centroid_lats_sorted = sorted(centroid_lats)
centroid_lons_sorted = sorted(centroid_lons)
mid = len(centroid_lats_sorted) // 2
# median
if len(centroid_lats_sorted) % 2 == 1:
center_lat = centroid_lats_sorted[mid]
center_lon = centroid_lons_sorted[mid]
else:
center_lat = (centroid_lats_sorted[mid - 1] + centroid_lats_sorted[mid]) / 2
center_lon = (centroid_lons_sorted[mid - 1] + centroid_lons_sorted[mid]) / 2
return {
'id': str(course.id),
'name': course.name,
'country': course.country or '',
'center_lat': round(center_lat, 6) if center_lat is not None else None,
'center_lon': round(center_lon, 6) if center_lon is not None else None,
'distance_m': course.distance or 0,
'notes': course.notes or '',
'status': 'established',
'submitted_by': 'migrated from Rowsandall',
'polygons': polygon_list,
}
def export_all(output_dir, dry_run=False, ids=None):
"""
Export all courses (or a subset by ID) to output_dir/courses/*.json
and generate output_dir/courses/index.json.
"""
courses_dir = Path(output_dir) / 'courses'
if not dry_run:
courses_dir.mkdir(parents=True, exist_ok=True)
queryset = GeoCourse.objects.all().order_by('id')
if ids:
queryset = queryset.filter(id__in=ids)
total = queryset.count()
log.info(f'Found {total} courses to export')
index = []
skipped = 0
exported = 0
for course in queryset:
try:
data = export_course(course)
except Exception as e:
log.warning(f'Course {course.id} ({course.name!r}): export failed — {e}')
skipped += 1
continue
# Skip courses with no polygons — they are unusable
if not data['polygons']:
log.warning(f'Course {course.id} ({course.name!r}): no polygons, skipping')
skipped += 1
continue
# Skip courses where any polygon has fewer than 3 points
bad_polygons = [
p['name'] for p in data['polygons'] if len(p['points']) < 3
]
if bad_polygons:
log.warning(
f'Course {course.id} ({course.name!r}): '
f'polygon(s) with <3 points: {bad_polygons}, skipping'
)
skipped += 1
continue
filename = courses_dir / f'{course.id}.json'
if dry_run:
log.info(
f'[DRY RUN] Would write {filename}'
f'{course.name!r} ({course.country}, {course.distance}m, '
f'{len(data["polygons"])} polygons)'
)
else:
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
log.info(
f'Wrote {filename}'
f'{course.name!r} ({course.country}, {course.distance}m, '
f'{len(data["polygons"])} polygons)'
)
# Add to index (no polygon detail)
index.append({
'id': data['id'],
'name': data['name'],
'country': data['country'],
'center_lat': data['center_lat'],
'center_lon': data['center_lon'],
'distance_m': data['distance_m'],
'status': data['status'],
})
exported += 1
# Write index
index_path = courses_dir / 'index.json'
if dry_run:
log.info(f'[DRY RUN] Would write {index_path} with {len(index)} entries')
else:
with open(index_path, 'w', encoding='utf-8') as f:
json.dump(index, f, indent=2, ensure_ascii=False)
log.info(f'Wrote {index_path} with {len(index)} entries')
log.info(f'Done. Exported: {exported}, skipped: {skipped}, total: {total}')
return exported, skipped
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description='Export Rowsandall courses to rownative JSON format.'
)
parser.add_argument(
'--output',
default='.',
help='Output directory (default: current directory). '
'Files are written to {output}/courses/*.json',
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Print what would be exported without writing any files.',
)
parser.add_argument(
'--ids',
nargs='+',
type=int,
metavar='ID',
help='Export only specific course IDs (space-separated).',
)
parser.add_argument(
'--summary',
action='store_true',
help='Print a summary of courses by country after export.',
)
args = parser.parse_args()
exported, skipped = export_all(
output_dir=args.output,
dry_run=args.dry_run,
ids=args.ids,
)
if args.summary and not args.dry_run:
print_summary(args.output)
def print_summary(output_dir):
"""Print a breakdown of exported courses by country."""
index_path = Path(output_dir) / 'courses' / 'index.json'
if not index_path.exists():
return
with open(index_path, encoding='utf-8') as f:
index = json.load(f)
from collections import Counter
counts = Counter(c['country'] or 'unknown' for c in index)
print('\nExported courses by country:')
for country, count in sorted(counts.items(), key=lambda x: -x[1]):
print(f' {country or "unknown":30s} {count:4d}')
print(f' {"TOTAL":30s} {len(index):4d}')
if __name__ == '__main__':
main()