rowsandall/scripts/export_courses.py

#!/usr/bin/env python
"""
export_from_rowsandall.py

Bulk export of all GeoCourse records from the Rowsandall database to the
rownative course library JSON format.

Usage (run from the root of the Rowsandall project):

    python scripts/export_from_rowsandall.py --output /path/to/rownative/courses

Or dry-run to check what would be exported without writing files:

    python scripts/export_from_rowsandall.py --dry-run

Output:
    courses/
        {id}.json          one file per course
        index.json         flat index of all courses (no polygon detail)

All migrated courses are set to status "established".
Course authorship (manager) is intentionally not exported — personal data.
The submitted_by field is set to "migrated from Rowsandall" for all courses.

Dependencies: runs inside the Rowsandall Django project. No new packages needed.
"""

import os
import sys
import json
import argparse
import logging
from pathlib import Path

# ---------------------------------------------------------------------------
# Django setup — must happen before any model imports
# ---------------------------------------------------------------------------
# Assumes the script is placed in rowsandall/scripts/ and run from
# the project root. Adjust DJANGO_SETTINGS_MODULE if needed.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rowsandall_app.settings')

import django
django.setup()

# ---------------------------------------------------------------------------
# Model imports — after django.setup()
# ---------------------------------------------------------------------------
from rowers.models import GeoCourse, GeoPolygon, GeoPoint

# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s  %(levelname)-8s  %(message)s',
    datefmt='%H:%M:%S',
)
log = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Core export logic
# ---------------------------------------------------------------------------

def export_course(course):
    """
    Convert a GeoCourse ORM object to the rownative JSON schema.

    Returns a dict ready for json.dumps().
    """
    polygons = GeoPolygon.objects.filter(
        course=course
    ).order_by('order_in_course')

    polygon_list = []
    for polygon in polygons:
        points = GeoPoint.objects.filter(
            polygon=polygon
        ).order_by('order_in_poly')

        point_list = [
            {'lat': p.latitude, 'lon': p.longitude}
            for p in points
        ]

        polygon_list.append({
            'name': polygon.name,
            'order': polygon.order_in_course,
            'points': point_list,
        })

    # Compute center from polygon centroids, matching course_coord_center()
    # in models.py — median of per-polygon centroid lat/lons.
    center_lat = None
    center_lon = None
    if polygon_list:
        centroid_lats = []
        centroid_lons = []
        for poly in polygon_list:
            if poly['points']:
                centroid_lats.append(
                    sum(p['lat'] for p in poly['points']) / len(poly['points'])
                )
                centroid_lons.append(
                    sum(p['lon'] for p in poly['points']) / len(poly['points'])
                )
        if centroid_lats:
            centroid_lats_sorted = sorted(centroid_lats)
            centroid_lons_sorted = sorted(centroid_lons)
            mid = len(centroid_lats_sorted) // 2
            # median
            if len(centroid_lats_sorted) % 2 == 1:
                center_lat = centroid_lats_sorted[mid]
                center_lon = centroid_lons_sorted[mid]
            else:
                center_lat = (centroid_lats_sorted[mid - 1] + centroid_lats_sorted[mid]) / 2
                center_lon = (centroid_lons_sorted[mid - 1] + centroid_lons_sorted[mid]) / 2

    return {
        'id': str(course.id),
        'name': course.name,
        'country': course.country or '',
        'center_lat': round(center_lat, 6) if center_lat is not None else None,
        'center_lon': round(center_lon, 6) if center_lon is not None else None,
        'distance_m': course.distance or 0,
        'notes': course.notes or '',
        'status': 'established',
        'submitted_by': 'migrated from Rowsandall',
        'polygons': polygon_list,
    }


def export_all(output_dir, dry_run=False, ids=None):
    """
    Export all courses (or a subset by ID) to output_dir/courses/*.json
    and generate output_dir/courses/index.json.
    """
    courses_dir = Path(output_dir) / 'courses'

    if not dry_run:
        courses_dir.mkdir(parents=True, exist_ok=True)

    queryset = GeoCourse.objects.all().order_by('id')
    if ids:
        queryset = queryset.filter(id__in=ids)

    total = queryset.count()
    log.info(f'Found {total} courses to export')

    index = []
    skipped = 0
    exported = 0

    for course in queryset:
        try:
            data = export_course(course)
        except Exception as e:
            log.warning(f'Course {course.id} ({course.name!r}): export failed — {e}')
            skipped += 1
            continue

        # Skip courses with no polygons — they are unusable
        if not data['polygons']:
            log.warning(f'Course {course.id} ({course.name!r}): no polygons, skipping')
            skipped += 1
            continue

        # Skip courses where any polygon has fewer than 3 points
        bad_polygons = [
            p['name'] for p in data['polygons'] if len(p['points']) < 3
        ]
        if bad_polygons:
            log.warning(
                f'Course {course.id} ({course.name!r}): '
                f'polygon(s) with <3 points: {bad_polygons}, skipping'
            )
            skipped += 1
            continue

        filename = courses_dir / f'{course.id}.json'

        if dry_run:
            log.info(
                f'[DRY RUN] Would write {filename} — '
                f'{course.name!r} ({course.country}, {course.distance}m, '
                f'{len(data["polygons"])} polygons)'
            )
        else:
            with open(filename, 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=2, ensure_ascii=False)
            log.info(
                f'Wrote {filename} — '
                f'{course.name!r} ({course.country}, {course.distance}m, '
                f'{len(data["polygons"])} polygons)'
            )

        # Add to index (no polygon detail)
        index.append({
            'id': data['id'],
            'name': data['name'],
            'country': data['country'],
            'center_lat': data['center_lat'],
            'center_lon': data['center_lon'],
            'distance_m': data['distance_m'],
            'status': data['status'],
        })

        exported += 1

    # Write index
    index_path = courses_dir / 'index.json'
    if dry_run:
        log.info(f'[DRY RUN] Would write {index_path} with {len(index)} entries')
    else:
        with open(index_path, 'w', encoding='utf-8') as f:
            json.dump(index, f, indent=2, ensure_ascii=False)
        log.info(f'Wrote {index_path} with {len(index)} entries')

    log.info(f'Done. Exported: {exported}, skipped: {skipped}, total: {total}')
    return exported, skipped


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(
        description='Export Rowsandall courses to rownative JSON format.'
    )
    parser.add_argument(
        '--output',
        default='.',
        help='Output directory (default: current directory). '
             'Files are written to {output}/courses/*.json',
    )
    parser.add_argument(
        '--dry-run',
        action='store_true',
        help='Print what would be exported without writing any files.',
    )
    parser.add_argument(
        '--ids',
        nargs='+',
        type=int,
        metavar='ID',
        help='Export only specific course IDs (space-separated).',
    )
    parser.add_argument(
        '--summary',
        action='store_true',
        help='Print a summary of courses by country after export.',
    )
    args = parser.parse_args()

    exported, skipped = export_all(
        output_dir=args.output,
        dry_run=args.dry_run,
        ids=args.ids,
    )

    if args.summary and not args.dry_run:
        print_summary(args.output)


def print_summary(output_dir):
    """Print a breakdown of exported courses by country."""
    index_path = Path(output_dir) / 'courses' / 'index.json'
    if not index_path.exists():
        return

    with open(index_path, encoding='utf-8') as f:
        index = json.load(f)

    from collections import Counter
    counts = Counter(c['country'] or 'unknown' for c in index)

    print('\nExported courses by country:')
    for country, count in sorted(counts.items(), key=lambda x: -x[1]):
        print(f'  {country or "unknown":30s} {count:4d}')
    print(f'  {"TOTAL":30s} {len(index):4d}')


if __name__ == '__main__':
    main()