Commit 9daddb15 authored by Jonathan Minz's avatar Jonathan Minz
Browse files

created .hpl to obs4mip compliant .nc files

parent 0841f649
Loading
Loading
Loading
Loading
+13043 −0

File added.

Preview size limit exceeded, changes collapsed.

+158 −0
Original line number Diff line number Diff line
import numpy as np
import xarray as xr
from datetime import datetime, timedelta
import re
import json

# === File path ===
file_path = "Stare_115_20240423_00.hpl"

# Load Global Attribute information
# --- Load global attributes from JSON ---
with open("obs4mips_attributes.json", "r") as f:
    json_attrs = json.load(f)



# Parse header and data from .hpl
# === Step 1: Read file ===
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
    lines = f.readlines()

# === Step 2: Parse header ===
header = {}
i = 0
while i < len(lines):
    line = lines[i].strip()
    if line == "****":
        i += 1
        break
    if ':' in line:
        key, value = line.split(':', 1)
        header[key.strip()] = value.strip()
    i += 1

# Parse key metadata
num_gates = int(header.get("Number of gates", 0))
gate_length = float(header.get("Range gate length (m)", 0))
start_time_str = header.get("Start time", "")
base_date = datetime.strptime(start_time_str.split()[0], "%Y%m%d")

# --- Clean header keys and store separately as non-global ---
header_str = "\n".join([f"{k}: {v}" for k, v in header.items()])
header_json_safe = json.dumps(header)  # in case you want to store it as a stringified dict

# Optional: include header string as a dataset-level attribute
json_attrs["original_hpl_header"] = header_str

# === Step 3: Prepare storage arrays ===
doppler = []
intensity = []
beta = []
times = []
azimuths = []
elevations = []

# === Step 4: Parse data blocks ===
while i < len(lines):
    # Ray metadata line
    ray_line = lines[i].strip()
    if not ray_line:
        i += 1
        continue

    parts = ray_line.split()
    decimal_hour = float(parts[0])
    azimuth = float(parts[1])
    elevation = float(parts[2])

    # Convert decimal time to datetime
    ray_time = base_date + timedelta(hours=decimal_hour)
    unix_time = (ray_time - datetime(1970, 1, 1)).total_seconds()

    # Advance to gate lines
    i += 1
    ray_doppler = []
    ray_intensity = []
    ray_beta = []

    for _ in range(num_gates):
        gate_parts = lines[i].strip().split()
        if len(gate_parts) < 4:
            i += 1
            continue
        ray_doppler.append(float(gate_parts[1]))
        ray_intensity.append(float(gate_parts[2]))
        ray_beta.append(float(gate_parts[3]))
        i += 1

    # Append ray data
    times.append(unix_time)
    azimuths.append(azimuth)
    elevations.append(elevation)
    doppler.append(ray_doppler)
    intensity.append(ray_intensity)
    beta.append(ray_beta)

# === Step 5: Convert to xarray and save ===
time = np.array(times)
range_gates = np.arange(num_gates) * gate_length + gate_length / 2


# Cleaning the attributes from file header
cleaned_attrs = {}
for k, v in header.items():
    # Replace illegal characters with underscores
    clean_key = re.sub(r"[^a-zA-Z0-9_]", "_", k.strip())
    # Ensure it doesn't start with a number
    if re.match(r"^[0-9]", clean_key):
        clean_key = "attr_" + clean_key
    cleaned_attrs[clean_key] = v

# --- Define variable attributes ---
var_attrs = {
    "doppler": {
        "units": "m/s",
        "long_name": "Radial Doppler velocity",
        "standard_name": "radial_velocity_of_scatterers_away_from_instrument"
    },
    "intensity": {
        "units": "unitless",
        "long_name": "Signal-to-noise ratio plus one"
    },
    "beta": {
        "units": "m-1 sr-1",
        "long_name": "Backscatter coefficient"
    }
}

ds = xr.Dataset(
    {
        "doppler": (["time", "range"], np.array(doppler)),
        "intensity": (["time", "range"], np.array(intensity)),
        "beta": (["time", "range"], np.array(beta)),
    },
    coords={
        "time": time,
        "range": range_gates,
        "azimuth": ("time", azimuths),
        "elevation": ("time", elevations),
    },
    attrs={
        **cleaned_attrs,
        "time_units": "seconds since 1970-01-01 00:00:00 UTC"
    }
)

# Assign variable attributes
for var_name, attrs in var_attrs.items():
    for attr_key, attr_val in attrs.items():
        ds[var_name].attrs[attr_key] = attr_val

# Assign global attributes
ds.attrs = json_attrs

# Save to NetCDF
print(ds.info)
ds.to_netcdf("hpl_doppler_lidar_output.nc")
print("Saved to lidar_output.nc")
+321 KiB

File added.

No diff preview for this file type.

+28 −0
Original line number Diff line number Diff line
{
    "activity_id": "obs4MIPs",
    "contact": "example@institute.org",
    "Conventions": "CF-1.11 ODS-2.5",
    "creation_date": "2025-07-15T16:00:00Z",
    "dataset_contributor": "John Doe",
    "data_specs_version": "2.5",
    "frequency": "mon",
    "grid": "1x1 degree latitude x longitude",
    "grid_label": "gn",
    "institution": "Example Institute for Climate Studies",
    "institution_id": "EICS",
    "nominal_resolution": "100 km",
    "processing_code_location": "https://github.com/example/obs4mips-processing-code",
    "product": "observations",
    "realm": "atmos",
    "region": "global",
    "source": "ExampleSource v1.0 (2025): Example observational dataset",
    "source_id": "ExampleSource-1-0",
    "source_label": "ExampleSource",
    "source_type": "satellite_blended",
    "source_version_number": "1.0",
    "variable_id": "tas",
    "variant_label": "EICS-BE",
    "license": "Data in this file produced by Example Institute for Climate Studies are licensed under a Creative Commons Attribution 4.0 International License (CC BY 4.0).",
    "references": "doi:10.1234/example.doi",
    "tracking_id": "hdl:21.14102/unique-uuid-1234"
}