Files
pupmap/scripts/overlay/clean_map_data.py
2025-10-03 22:48:16 +01:00

48 lines
1.2 KiB
Python

import geopandas as gpd
# --- 1. Clean Cities ---
cities_path = "world_cities_europe.geojson"
cities_clean_path = "world_cities_europe_clean.geojson"
cities = gpd.read_file(cities_path)
# Ensure POP_MAX exists and numeric
if 'POP_MAX' not in cities.columns:
cities['POP_MAX'] = 0
else:
cities['POP_MAX'] = cities['POP_MAX'].fillna(0)
cities['POP_MAX'] = cities['POP_MAX'].astype(int)
# Remove empty geometries
cities = cities[~cities['geometry'].is_empty]
cities.to_file(cities_clean_path, driver="GeoJSON")
print(f"[+] Cleaned cities saved to {cities_clean_path}")
# --- 2. Clean Roads ---
roads_path = "roads_europe.geojson"
roads_clean_path = "roads_europe_clean.geojson"
roads = gpd.read_file(roads_path)
# Remove empty geometries
roads = roads[~roads['geometry'].is_empty]
# Add 'type' property based on 'fclass'
def classify_road(fclass):
if str(fclass).lower() in ['motorway', 'primary']:
return 'major'
else:
return 'minor'
if 'fclass' in roads.columns:
roads['type'] = roads['fclass'].apply(classify_road)
else:
# If no fclass, mark everything as minor
roads['type'] = 'minor'
roads.to_file(roads_clean_path, driver="GeoJSON")
print(f"[+] Cleaned roads saved to {roads_clean_path}")