Exercise 1 You will make two maps:
- Plot your country as the background. Use cx to plot some points (the cities, airports, etc.) below the centroid.
- Plot your country as the background. Select with cx all the first administrative divisions above the centroid. Then, use clip to show some lines (rivers, railroads, etc) that cross those divisions.
#!pip install geopandas
#!pip install fiona
import geopandas as gpd
from fiona import listlayers
#countries=gpd.read_file("https://github.com/AdriMA3/introgeodf/raw/refs/heads/main/maps/World_Countries/World_Countries.shp")
#cities=gpd.read_file("https://github.com/AdriMA3/introgeodf/raw/refs/heads/main/maps/World_Cities/World_Cities.shp")
#regiones=gpd.read_file("https://github.com/AdriMA3/introgeodf/raw/refs/heads/main/maps/Regiones/Regional.shp")
#airports=gpd.read_file("https://github.com/AdriMA3/introgeodf/raw/refs/heads/main/data/chile-airports.csv")
#ferrovias=gpd.read_file("https://github.com/AdriMA3/introgeodf/raw/refs/heads/main/maps/Red_Ferroviaria/Red_ferroviaria.shp")
chile=countries[countries.COUNTRY=="Chile"]
chile_5361=chile.to_crs(5361)
#asignaremos un crs conocido para luego poroyectar
# Asegurar que ambos estén en el mismo CRS antes del clip
ferrovias_aligned = ferrovias.to_crs(chile.crs)
# Realizar el clip
ferroviasChile_clipped = gpd.clip(ferrovias_aligned, chile)
# Luego proyectar al CRS deseado (por ejemplo EPSG:5361)
ferrovias_chile_5361 = ferroviasChile_clipped.to_crs(5361)
airports=gpd.GeoDataFrame(data=airports.copy(),
geometry=gpd.points_from_xy(airports.longitude_deg,
airports.latitude_deg),
crs=chile.crs.to_epsg())
airports_5361=airports.to_crs(5361)
regiones.crs
<Projected CRS: EPSG:3857> Name: WGS 84 / Pseudo-Mercator Axis Info [cartesian]: - X[east]: Easting (metre) - Y[north]: Northing (metre) Area of Use: - name: World between 85.06°S and 85.06°N. - bounds: (-180.0, -85.06, 180.0, 85.06) Coordinate Operation: - name: Popular Visualisation Pseudo-Mercator - method: Popular Visualisation Pseudo Mercator Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
regiones.crs = "EPSG:3857"
regiones=regiones.to_crs(5361)
chile_centroid=chile.to_crs(5361).centroid
cities_chile_5361=cities[cities.COUNTRY=='Chile'].to_crs(5361)
#centroide
centroidX,centroidY=chile_5361.centroid.x.values[0],chile_5361.centroid.y.values[0]
#the arriba
regiones_A=regiones.cx[:,centroidY:]
cities_chile_5361_A=cities_chile_5361.cx[:,centroidY:]
airports_5361_A=airports_5361.cx[:,centroidY:]
#the abajo
regiones_B=regiones.cx[:,:centroidY]
cities_chile_5361_B=cities_chile_5361.cx[:,:centroidY]
airports_5361_B=airports_5361.cx[:,:centroidY]
base=chile_5361.plot(facecolor='lightblue',edgecolor='black',linewidth=0.4,figsize=(5,5))
regiones_A.plot(marker='.',color='blue',markersize=13,ax=base)
<Axes: >
regiones_A.plot(facecolor='lightblue',edgecolor='black',linewidth=0.4,figsize=(5,5))
<Axes: >
base = chile_5361.plot(facecolor='lightgreen', edgecolor='black', figsize=(10, 10))
airports_5361_B.plot(marker='.', color='red', markersize=5, ax=base)
<Axes: >
base=chile_5361.plot(facecolor='lightblue',edgecolor='black',linewidth=0.4,figsize=(5,5))
regiones_A.plot(facecolor='lightblue',edgecolor='black',linewidth=1,ax=base)
ferrovias_chile_5361.plot(edgecolor="purple",linewidth=0.5,ax=base)
<Axes: >
Exercise 2
1.Create some subset of polygons with your country data at the municipal (or similar level). Use Unary UNION with those polygons, and create a geoDF with the result. 2.Dissolve your municipalities by another higher level administrative level. Plot the result. 3.If possible, color some areas of your country by aggregating; if not, plot the "median" values in the indicators map.
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from shapely.ops import unary_union
comunas_gov=gpd.read_file("https://github.com/AdriMA3/introgeodf/raw/refs/heads/main/maps/Comunas/comunas.shp")
comunas_gov = comunas_gov.set_crs(epsg=3857, allow_override=True)
comunas_gov_clipped=gpd.clip(comunas_gov,chile)
comunas_gov_5361= comunas_gov_clipped.to_crs(epsg=5361)
comunas_gov= comunas_gov.to_crs(5361)
comunas_gov.plot()
<Axes: >
len(comunas_gov.Region)
346
len(set(comunas_gov.Provincia))
56
!pip show shapely
Name: shapely Version: 2.1.1 Summary: Manipulation and analysis of geometric objects Home-page: Author: Sean Gillies Author-email: License: BSD 3-Clause Location: /usr/local/lib/python3.11/dist-packages Requires: numpy Required-by: bigframes, geopandas, google-cloud-aiplatform, libpysal
from shapely.geometry import MultiPolygon
union_total = comunas_gov.unary_union
# Extraer el polígono más grande (Chile continental)
if isinstance(union_total,MultiPolygon):
continente = max(union_total.geoms, key=lambda p: p.area)
else:
continente = union_total
# Filtrar comunas y regiones que intersectan con el continente
comunas_cont = comunas_gov[comunas_gov.intersects(continente)]
# Visualización
ax = comunas_cont.plot(facecolor='lightgrey', edgecolor='black')
comunas_cont[comunas_cont.Region=="Región del Libertador Bernardo O'Higgins"].plot()
<Axes: >
comunas_cont[comunas_cont.Region=="Región del Libertador Bernardo O'Higgins"].union_all()
Region_Libertador=comunas_cont[comunas_cont.Region=="Región del Libertador Bernardo O'Higgins"].union_all()
gpd.GeoDataFrame(index=[0],data={'Region':"Región del Libertador Bernardo O'Higgins"},
crs=comunas_cont.crs,
geometry=[Region_Libertador])
Region | geometry | |
---|---|---|
0 | Región del Libertador Bernardo O'Higgins | MULTIPOLYGON (((284438.205 6143707.002, 284422... |
comunas_cont.plot(facecolor="lightgrey",edgecolor="black",linewidth=0.2)
<Axes: >
# dissolving
comunas_cont.dissolve(by='Region').plot(facecolor='yellow', edgecolor='black',linewidth=0.2)
<Axes: >
indicatorsbyregion=comunas_cont.dissolve(
by="Region",
aggfunc={
"Comuna": "count",
},as_index=False,
)
indicatorsbyregion.plot(column = 'Region')
<Axes: >
Usaremos la cantidad de población por región para el mapa
poblacion=gpd.read_file("https://github.com/AdriMA3/introgeodf/raw/refs/heads/main/datos%20(1).csv")
poblacion.head()
field_1 | field_2 | field_3 | |
---|---|---|---|
0 | Unidad territorial | Variable | 2023 |
1 | Chile | Población total | 19960889 |
2 | Región De Arica Y Parinacota | Población total | 259802.0 |
3 | Región De Tarapacá | Población total | 401588.0 |
4 | Región De Antofagasta | Población total | 714142.0 |
poblacion.rename(columns={
'field_1': 'Region',
'field_2': 'Pobl','field_3': 'Cant'
}, inplace=True)
poblacion.head()
Region | Pobl | Cant | |
---|---|---|---|
0 | Unidad territorial | Variable | 2023 |
1 | Chile | Población total | 19960889 |
2 | Región De Arica Y Parinacota | Población total | 259802.0 |
3 | Región De Tarapacá | Población total | 401588.0 |
4 | Región De Antofagasta | Población total | 714142.0 |
print(poblacion['Region'].unique())
print(comunas_cont['Region'].unique())
['Unidad territorial' 'Chile' 'Región De Arica Y Parinacota' 'Región De Tarapacá' 'Región De Antofagasta' 'Región De Atacama' 'Región De Coquimbo' 'Región De Valparaíso' 'Región Metropolitana De Santiago' "Región Del Libertador Gral. Bernardo O'higgins" 'Región Del Maule' 'Región del Ñuble' 'Región Del Biobío' 'Región De La Araucanía' 'Región De Los Ríos' 'Región De Los Lagos' 'Región De Aysén Del Gral. Carlos Ibáñez Del Campo' 'Región De Magallanes Y De La Antártica Chilena'] ["Región del Libertador Bernardo O'Higgins" 'Región de La Araucanía' 'Región Metropolitana de Santiago' 'Región de Los Lagos' 'Región de Los Ríos' 'Región del Maule' 'Región de Coquimbo' 'Región de Magallanes y Antártica Chilena' 'Zona sin demarcar' 'Región de Valparaíso' 'Región del Bío-Bío' 'Región de Aysén del Gral.Ibañez del Campo' 'Región de Tarapacá' 'Región de Arica y Parinacota' 'Región de Antofagasta' 'Región de Ñuble' 'Región de Atacama']
poblacion['Region'] = poblacion['Region'].str.strip().str.upper()
comunas_cont['Region'] = comunas_cont['Region'].str.strip().str.upper()
comunas_con_pob = comunas_cont.merge(poblacion, on='Region', how='left')
comunas_con_pob.plot(
column='Cant',
cmap='YlOrRd',
legend=True,
legend_kwds={'title': "Población por región"},
edgecolor='black',
linewidth=0.5,
figsize=(10, 7)
)
<Axes: >
Exercise 3
Select some points from your maps.
Create the convex hull for those points.
Turn the hull into a GDF.
Plot the hull on top of the country.
#airportsCh=gpd.read_file("https://github.com/AdriMA3/introgeodf/raw/refs/heads/main/data/chile-airports.csv")
# bye first row
airports.drop(index=0,inplace=True)
airports.reset_index(drop=True, inplace=True)
keep=['name','type','latitude_deg', 'longitude_deg','elevation_ft','region_name','municipality']
airports=airports.loc[:,keep]
airports.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 519 entries, 0 to 518 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 name 519 non-null object 1 type 519 non-null object 2 latitude_deg 519 non-null object 3 longitude_deg 519 non-null object 4 elevation_ft 519 non-null object 5 region_name 519 non-null object 6 municipality 519 non-null object dtypes: object(7) memory usage: 28.5+ KB
#remember:
type(airports), type(airports)
(pandas.core.frame.DataFrame, pandas.core.frame.DataFrame)
airports_5361=airports.to_crs(5361)
base=chile_5361.plot(facecolor='lightblue',edgecolor='black',linewidth=0.4,figsize=(5,5))
airports_5361.plot(ax=base)
<Axes: >
# coordinates
centroidX,centroidY=chile_5361.centroid.x.values[0],chile_5361.centroid.y.values[0]
# subsets of medium airports
AirTopLeft=airports_5361[airports_5361['type']=='medium_airport'].cx[:centroidX,centroidY:]
AirTopRight=airports_5361[airports_5361['type']=='medium_airport'].cx[centroidX:,centroidY:]
AirBottomLeft=airports_5361[airports_5361['type']=='medium_airport'].cx[:centroidX,:centroidY]
AirBottomRight=airports_5361[airports_5361['type']=='medium_airport'].cx[centroidX:,:centroidY]
airports_5361.plot()
<Axes: >
base=AirTopLeft.plot(facecolor='grey', alpha=0.4)
AirTopRight.plot(ax=base,facecolor='orange', alpha=0.4)
AirBottomLeft.plot(ax=base,facecolor='green', alpha=0.4)
AirBottomRight.plot(ax=base,facecolor='red', alpha=0.4)
<Axes: >
airports_5361['type'].unique()
array(['large_airport', 'medium_airport', 'small_airport', 'closed', 'heliport'], dtype=object)
medium_airport = airports_5361[airports_5361['type'] == 'medium_airport']
medium_airport.shape # para ver cuántos hay
medium_airport.plot() # para visualizar
<Axes: >
medium_airport=airports_5361[airports_5361['type']=='medium_airport']
medium_airport.union_all().convex_hull
medium_airport_hull= gpd.GeoDataFrame(index=[0],
crs=medium_airport.crs,
geometry=[medium_airport.union_all().convex_hull])
medium_airport_hull['name']='medium airport hull' # optional
# then
medium_airport_hull
geometry | name | |
---|---|---|
0 | POLYGON ((588021.422 3912012.072, -3697936.924... | medium airport hull |
base=chile_5361.plot(facecolor='lightgreen')
medium_airport.plot(ax=base)
medium_airport_hull.plot(ax=base,facecolor='green',
edgecolor='white',alpha=0.4,
hatch='X')#se dice que esa la zona con más influencia de chile
<Axes: >
Exercise 4
Apply two spatial overlays to your maps. If possible. If not, try the codes below.
# the north
ComN_chile=comunas_cont.cx[:,centroidY:]
# the south
ComS_chile=comunas_cont.cx[:,:centroidY]
# the west
ComW_chile=comunas_cont.cx[:centroidX,:]
# the east
ComE_chile=comunas_cont.cx[centroidX:,:]
base=ComN_chile.plot(facecolor='yellow', edgecolor='black',linewidth=0.2, alpha=0.6)
ComS_chile.plot(facecolor='grey', edgecolor='black',linewidth=0.2,ax=base, alpha=0.4)
<Axes: >
base=ComE_chile.plot(facecolor='yellow', edgecolor='black',linewidth=0.2, alpha=0.6)
ComW_chile.plot(facecolor='grey', edgecolor='black',linewidth=0.2,ax=base, alpha=0.4)
<Axes: >
ComNS_chile=ComN_chile.overlay(ComS_chile, how="intersection",keep_geom_type=True)
ComNS_chile.plot()
<Axes: >
# keeping the overlay
ComWE_chile=ComW_chile.overlay(ComE_chile, how="intersection",keep_geom_type=True)
ComWE_chile.plot(edgecolor='white',linewidth=0.1)
<Axes: >
ComNS_chile.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 27 entries, 0 to 26 Data columns (total 23 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 objectid_1 27 non-null int64 1 shape_leng_1 27 non-null float64 2 dis_elec_1 27 non-null int32 3 cir_sena_1 27 non-null int32 4 cod_comuna_1 27 non-null int32 5 codregion_1 27 non-null int32 6 st_area_sh_1 27 non-null float64 7 st_length__1 27 non-null float64 8 Region_1 27 non-null object 9 Comuna_1 27 non-null object 10 Provincia_1 27 non-null object 11 objectid_2 27 non-null int64 12 shape_leng_2 27 non-null float64 13 dis_elec_2 27 non-null int32 14 cir_sena_2 27 non-null int32 15 cod_comuna_2 27 non-null int32 16 codregion_2 27 non-null int32 17 st_area_sh_2 27 non-null float64 18 st_length__2 27 non-null float64 19 Region_2 27 non-null object 20 Comuna_2 27 non-null object 21 Provincia_2 27 non-null object 22 geometry 27 non-null geometry dtypes: float64(6), geometry(1), int32(8), int64(2), object(6) memory usage: 4.1+ KB
ComWE_chile.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 119 entries, 0 to 118 Data columns (total 23 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 objectid_1 119 non-null int64 1 shape_leng_1 119 non-null float64 2 dis_elec_1 119 non-null int32 3 cir_sena_1 119 non-null int32 4 cod_comuna_1 119 non-null int32 5 codregion_1 119 non-null int32 6 st_area_sh_1 119 non-null float64 7 st_length__1 119 non-null float64 8 Region_1 119 non-null object 9 Comuna_1 119 non-null object 10 Provincia_1 119 non-null object 11 objectid_2 119 non-null int64 12 shape_leng_2 119 non-null float64 13 dis_elec_2 119 non-null int32 14 cir_sena_2 119 non-null int32 15 cod_comuna_2 119 non-null int32 16 codregion_2 119 non-null int32 17 st_area_sh_2 119 non-null float64 18 st_length__2 119 non-null float64 19 Region_2 119 non-null object 20 Comuna_2 119 non-null object 21 Provincia_2 119 non-null object 22 geometry 119 non-null geometry dtypes: float64(6), geometry(1), int32(8), int64(2), object(6) memory usage: 17.8+ KB
keep=['Region_1','Comuna_1','geometry']
ComNS_chile=ComNS_chile.loc[:,keep]
ComWE_chile=ComWE_chile.loc[:,keep]
# now
ComNS_chile.overlay(ComWE_chile,how="union",keep_geom_type=True)
Region_1_1 | Comuna_1_1 | Region_1_2 | Comuna_1_2 | geometry | |
---|---|---|---|---|---|
0 | REGIÓN DEL MAULE | Linares | REGIÓN DEL MAULE | Linares | POLYGON ((275006.536 6034162.298, 274991.857 6... |
1 | REGIÓN DEL MAULE | Linares | REGIÓN DEL MAULE | Linares | POLYGON ((275006.536 6034162.298, 274991.857 6... |
2 | REGIÓN DEL MAULE | Linares | REGIÓN DEL MAULE | Colbún | POLYGON ((275006.536 6034162.298, 274991.857 6... |
3 | REGIÓN DEL MAULE | Linares | REGIÓN DEL MAULE | Colbún | POLYGON ((275006.536 6034162.298, 274991.857 6... |
4 | REGIÓN DEL MAULE | Colbún | REGIÓN DEL MAULE | Linares | POLYGON ((274991.857 6034196.532, 274972.302 6... |
... | ... | ... | ... | ... | ... |
146 | NaN | NaN | REGIÓN DEL LIBERTADOR BERNARDO O'HIGGINS | San Fernando | MULTIPOLYGON (((340218.985 6138360.287, 340243... |
147 | NaN | NaN | REGIÓN DEL LIBERTADOR BERNARDO O'HIGGINS | San Fernando | MULTIPOLYGON (((352516.217 6129349.367, 352535... |
148 | NaN | NaN | REGIÓN DEL LIBERTADOR BERNARDO O'HIGGINS | San Fernando | POLYGON ((375878.394 6125841.714, 375827.358 6... |
149 | NaN | NaN | REGIÓN DE ÑUBLE | San Fabián | POLYGON ((298686.514 5962705.251, 298749.648 5... |
150 | NaN | NaN | REGIÓN DE ÑUBLE | Coihueco | POLYGON ((299405.802 5921738.641, 299422.276 5... |
151 rows × 5 columns
# appending
import pandas as pd
pd.concat([ComNS_chile,ComWE_chile],ignore_index=True)
Region_1 | Comuna_1 | geometry | |
---|---|---|---|
0 | REGIÓN DEL MAULE | Linares | POLYGON ((274991.856 6034196.541, 275006.536 6... |
1 | REGIÓN DEL MAULE | Longaví | POLYGON ((247563.377 6036181.585, 247655.341 6... |
2 | REGIÓN DEL MAULE | Pelluhue | POLYGON ((159474.527 6007908.795, 159474.527 6... |
3 | REGIÓN DE ÑUBLE | San Carlos | POLYGON ((209725.034 5988206.085, 209827.84 59... |
4 | REGIÓN DE ÑUBLE | San Carlos | MULTIPOLYGON (((205528.093 5982065.463, 205525... |
... | ... | ... | ... |
141 | REGIÓN DEL LIBERTADOR BERNARDO O'HIGGINS | San Fernando | MULTIPOLYGON (((352516.217 6129349.367, 352535... |
142 | REGIÓN DEL LIBERTADOR BERNARDO O'HIGGINS | San Fernando | POLYGON ((375878.394 6125841.714, 375827.358 6... |
143 | REGIÓN DEL MAULE | Parral | POLYGON ((297046.523 5964801.224, 297075.658 5... |
144 | REGIÓN DE ÑUBLE | San Fabián | POLYGON ((298526.854 5963112.258, 298565.267 5... |
145 | REGIÓN DE ÑUBLE | Coihueco | POLYGON ((299405.802 5921738.641, 299422.276 5... |
146 rows × 3 columns
ComNS_chile.overlay(ComWE_chile, how="union",keep_geom_type=True).dissolve().plot()
<Axes: >
ComMidChile=ComNS_chile.overlay(ComWE_chile, how="union",keep_geom_type=True).dissolve()
ComMidChile
geometry | Region_1_1 | Comuna_1_1 | Region_1_2 | Comuna_1_2 | |
---|---|---|---|---|---|
0 | MULTIPOLYGON (((24620.801 4456669.505, 24579.8... | REGIÓN DEL MAULE | Linares | REGIÓN DEL MAULE | Linares |
# some cleaning
ComMidChile['zone']='middles'
ComMidChile=ComMidChile.loc[:,['zone','geometry']]
ComMidChile
zone | geometry | |
---|---|---|
0 | middles | MULTIPOLYGON (((24620.801 4456669.505, 24579.8... |
# with the comunas
comunas_cont.overlay(ComMidChile, how='difference').plot()
<Axes: >
ComW_chile.overlay(ComMidChile, how="symmetric_difference",keep_geom_type=True).plot()
<Axes: >
vamos a tratar de arreglarlo para que se vea mejor¶
# non valid
#ComS_chile[~ComS_chile.is_valid]#poligonos no validos
# what is wrong?
#from shapely.validation import explain_validity, make_valid
#explain_validity(ComS_chile[~ComS_chile.is_valid].geometry)
# varieties?
#ComS_chile['validity']=[x.split('[')[0] for x in ComS_chile.geometry.apply(lambda x: explain_validity(x))]
#ComS_chile['validity'].value_counts()
# solving the issue:#salvando
#ComS_chile.drop(columns=['validity'],inplace=True)
#ComS_chile_valid=ComS_chile.copy()
#ComS_chile_valid['geometry'] = [make_valid(row) if not row.is_valid else row for row in ComS_chile_valid['geometry'] ]
#any invalid?
#ComS_chile_valid[~ComS_chile_valid.is_valid]
#pd.Series([type(x) for x in ComS_chile_valid.geometry]).value_counts()#a veces crea poligonos y geometry collection
# solving the issue:
#ComN_chile_valid=ComN_chile.copy()
#ComE_chile_valid=ComE_chile.copy()
#ComW_chile_valid=ComW_chile.copy()
#ComN_chile_valid['geometry'] = [make_valid(row) if not row.is_valid else row for row in ComN_chile_valid['geometry'] ]
#ComE_chile_valid['geometry'] = [make_valid(row) if not row.is_valid else row for row in ComE_chile_valid['geometry'] ]
#ComW_chile_valid['geometry'] = [make_valid(row) if not row.is_valid else row for row in ComW_chile_valid['geometry'] ]
#ComN_chile_cleaned = ComN_chile_valid.copy()
#ComS_chile_cleaned = ComS_chile_valid.copy()
#ComE_chile_cleaned = ComE_chile_valid.copy()
#ComW_chile_cleaned = ComW_chile_valid.copy()
# Apply buffer(0) to potentially fix remaining issues
#ComN_chile_cleaned['geometry'] = ComN_chile_cleaned.geometry.buffer(0)
#ComS_chile_cleaned['geometry'] = ComS_chile_cleaned.geometry.buffer(0)
#ComE_chile_cleaned['geometry'] = ComE_chile_cleaned.geometry.buffer(0)
#ComW_chile_cleaned['geometry'] = ComW_chile_cleaned.geometry.buffer(0)
# Try the overlay with the cleaned dataframes
#ComN_chile_cleaned.overlay(ComS_chile_cleaned, how="symmetric_difference", keep_geom_type=True).plot()
#ComE_chile_cleaned.overlay(ComW_chile_cleaned, how="symmetric_difference", keep_geom_type=True).plot()