Skip to article frontmatterSkip to article content
import json
from pathlib import Path
from shapely.geometry import shape
from shapely.ops import unary_union
import hashlib
with open(f"../data/metadata_pastis.geojson", "r") as f:
    geojson = json.load(f)
tile_metadata = {}

for feature in geojson["features"]:
    props = feature["properties"]
    tile = props["TILE"]
    geom = shape(feature["geometry"])

    if tile not in tile_metadata:
        tile_metadata[tile] = {
            "geometries": [],
            "dates": {
                "S1A": set(),
                "S1D": set(),
                "S2": set()
            }
        }

    tile_metadata[tile]["geometries"].append(geom)

    for sensor in ["S1A", "S1D", "S2"]:
        date_dict = props.get(f"dates-{sensor}", {})
        tile_metadata[tile]["dates"][sensor].update(date_dict.values())

for tile, data in tile_metadata.items():
    unified_geom = unary_union(data["geometries"])
    bbox = unified_geom.bounds  # (minx, miny, maxx, maxy)
    tile_metadata[tile]["bounding_box"] = {
        "minx": bbox[0],
        "miny": bbox[1],
        "maxx": bbox[2],
        "maxy": bbox[3]
    }

    print(f"\nTile: {tile}")
    print(f"  Bounding Box: {tile_metadata[tile]['bounding_box']}")

    for sensor, dates in data["dates"].items():
        dates = sorted(map(int, dates))
        if dates:
            coverage = {
                "start": dates[0],
                "end": dates[-1],
                "count": len(dates)
            }
        else:
            coverage = {
                "start": None,
                "end": None,
                "count": 0
            }

        tile_metadata[tile]["dates"][sensor] = coverage
        print(f"  {sensor} coverage: {coverage}")

Tile: t30uxv
  Bounding Box: {'minx': 363699.72588260216, 'miny': 6842288.5427378975, 'maxx': 471523.17614071444, 'maxy': 6958087.14156515}
  S1A coverage: {'start': 20181004, 'end': 20191204, 'count': 65}
  S1D coverage: {'start': 20181002, 'end': 20191202, 'count': 70}
  S2 coverage: {'start': 20180924, 'end': 20191019, 'count': 43}

Tile: t31tfj
  Bounding Box: {'minx': 802518.4751780948, 'miny': 6244474.937699021, 'maxx': 908891.7850479226, 'maxy': 6349417.542317636}
  S1A coverage: {'start': 20181001, 'end': 20191201, 'count': 71}
  S1D coverage: {'start': 20181006, 'end': 20191130, 'count': 70}
  S2 coverage: {'start': 20180917, 'end': 20191027, 'count': 61}

Tile: t31tfm
  Bounding Box: {'minx': 801218.5987733917, 'miny': 6541675.540056112, 'maxx': 908655.9347564826, 'maxy': 6649123.634602138}
  S1A coverage: {'start': 20181001, 'end': 20191201, 'count': 71}
  S1D coverage: {'start': 20181005, 'end': 20191129, 'count': 69}
  S2 coverage: {'start': 20180920, 'end': 20191025, 'count': 46}

Tile: t32ulu
  Bounding Box: {'minx': 941951.9043822229, 'miny': 6743041.037451558, 'maxx': 1051200.956177081, 'maxy': 6859518.94950735}
  S1A coverage: {'start': 20181002, 'end': 20191202, 'count': 71}
  S1D coverage: {'start': 20181006, 'end': 20191130, 'count': 70}
  S2 coverage: {'start': 20180917, 'end': 20191012, 'count': 38}
def compute_tar_digest(path):
    hash_fn = hashlib.sha256()
    with open(path, "rb") as f:
        while chunk := f.read(8192):
            hash_fn.update(chunk)
    return f"sha256:{hash_fn.hexdigest()}"

tar_dir = Path("../data/pastis")
layers = []

for tile_name in sorted(tile_metadata.keys()):
    tar_path = tar_dir / f"{tile_name}.tar"
    if not tar_path.exists():
        print(f"⚠️  Skipping {tile_name}: tar file not found at {tar_path}")
        continue

    print(f"Processing {tar_path.name}...")
    digest = compute_tar_digest(tar_path)
    meta = tile_metadata[tile_name]

    layer = {
        "tile": tile_name,
        "bounding_box": meta.get("bounding_box"),
        "temporal_coverage": meta.get("dates"),
        "layer_digest": digest,
        "layer_mediaType": "application/vnd.oci.image.layer.v1.tar"
    }
    layers.append(layer)

config = {
    "schemaVersion": 2,
    "mediaType": "application/vnd.oci.image.config.v1+json",
    "artifactType": "application/vnd.whatever.v1+tar",
    "dataset": "PASTIS-HD",
    "created_by": "IGN",
    "license": "etalab-2.0",
    "tiles": layers
}

file_path = tar_dir / "config-t4.json"
with open(file_path, "w") as f:
    json.dump(config, f, indent=2)

print(f"Config written at {Path(file_path).resolve()})")
Processing t30uxv.tar...
Processing t31tfj.tar...
Processing t31tfm.tar...
Processing t32ulu.tar...
Config written at /mounts/blobs/pastis/config-t4.json)