import geopandas as gpd # --- 1. Clean Cities --- cities_path = "world_cities_europe.geojson" cities_clean_path = "world_cities_europe_clean.geojson" cities = gpd.read_file(cities_path) # Ensure POP_MAX exists and numeric if 'POP_MAX' not in cities.columns: cities['POP_MAX'] = 0 else: cities['POP_MAX'] = cities['POP_MAX'].fillna(0) cities['POP_MAX'] = cities['POP_MAX'].astype(int) # Remove empty geometries cities = cities[~cities['geometry'].is_empty] cities.to_file(cities_clean_path, driver="GeoJSON") print(f"[+] Cleaned cities saved to {cities_clean_path}") # --- 2. Clean Roads --- roads_path = "roads_europe.geojson" roads_clean_path = "roads_europe_clean.geojson" roads = gpd.read_file(roads_path) # Remove empty geometries roads = roads[~roads['geometry'].is_empty] # Add 'type' property based on 'fclass' def classify_road(fclass): if str(fclass).lower() in ['motorway', 'primary']: return 'major' else: return 'minor' if 'fclass' in roads.columns: roads['type'] = roads['fclass'].apply(classify_road) else: # If no fclass, mark everything as minor roads['type'] = 'minor' roads.to_file(roads_clean_path, driver="GeoJSON") print(f"[+] Cleaned roads saved to {roads_clean_path}")