Social Transformative Capacity ¶

Alexander Dunkel, Leibniz Institute of Ecological Urban and Regional Development,
Transformative Capacities & Research Data Centre (IÖR-FDZ)

No description has been provided for this image

Calculate Social Transformative Capacity from Geosocial Media.

Base data:

50 Million Geosocial Media posts
sorted based on user-id, number of posts per user-id ascending
use LLM to classify users

Preparations¶

Load dependencies:

import os
import csv
import sys
import colorcet
import psycopg2 # Postgres API
import geoviews as gv
import holoviews as hv
import mapclassify as mc
import geopandas as gp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geoviews.feature as gf
from pathlib import Path
from collections import namedtuple
from pathlib import Path
from typing import List, Tuple, Dict, Optional
from pyproj import Transformer, CRS, Proj
from geoviews import opts
from shapely.geometry import shape, Point, Polygon
from shapely.ops import transform
from holoviews import dim
from cartopy import crs
from matplotlib import colors
from IPython.display import clear_output, display, HTML, Markdown, Code
from bokeh.models import HoverTool, FuncTickFormatter, FixedTicker
# optionally, enable shapely.speedups 
# which makes some of the spatial 
# queries running faster
import shapely.speedups as speedups
from shapely import geometry

Activate autoreload of changed python files:

%load_ext autoreload
%autoreload 2

Load helper module.

module_path = str(Path.cwd().parents[0] / "py")
if module_path not in sys.path:
    sys.path.append(module_path)
from modules.base import tools, preparations

Initialize Bokeh and shapely.speedups

preparations.init_imports()

Parameters¶

Define initial parameters that affect processing

WORK_DIR = Path.cwd().parents[0] / "tmp"     # Working directory
EPSG_CODE = 25832                            # Target projection EPSG Code
CRS_PROJ = f"epsg:{EPSG_CODE}"               # Target projection
CRS_WGS = "epsg:4326"                        # Input projection (Web Mercator)
OUTPUT = Path.cwd().parents[0] / "out"       # Define path to output directory (figures etc.)
SHAPE_DIR = OUTPUT / "shapes"                # Shapefiles

WORK_DIR.mkdir(exist_ok=True)

tools.create_paths(OUTPUT)

City shapes¶

We will get the city shape areas from the BKG product "VG250-EW", see Verwaltungsgebiete.

zip_citybd = "https://daten.gdz.bkg.bund.de/produkte/vg/vg250-ew_ebenen_1231/aktuell/vg250-ew_12-31.utm32s.gpkg.ebenen.zip"

tools.get_zip_extract(
    uri_filename=zip_citybd, output_path=SHAPE_DIR)

File already exists.. skipping download..

vg_shapes = SHAPE_DIR / "vg250-ew_12-31.utm32s.gpkg.ebenen" / "vg250-ew_ebenen_1231" / "DE_VG250.gpkg"

Load from file and plot preview

shapes = gp.read_file(vg_shapes, layer="vg250_vwg")

ax = shapes.plot()
ax.set_axis_off()

Select all larger Cities¶

tools.drop_cols_except(df=shapes, columns_keep=["geometry", "EWZ", "GEN"])

shapes.head()

lcities_shapes = shapes[shapes["EWZ"]>100000]

Load Bundesländer shapes, for plotting references.

de_shapes = tools.get_shapes("de", shape_dir=SHAPE_DIR)
de_shapes = de_shapes.to_crs(f"EPSG:{EPSG_CODE}")

Already exists

Plot preview in two layers, select all larger cities.

plt_kwags = {
    "color":'none',
    "edgecolor":'black',
    "linewidth":0.2,
    "figsize":(2, 4),
}
ax = de_shapes.plot(**plt_kwags)
ax = lcities_shapes.plot(ax=ax, color="red")
ax.set_axis_off()

lcities_shapes.head()

Dissolve shapes by GEN Column (City Reference)

lcities_shapes = lcities_shapes.dissolve(by='GEN')

len(lcities_shapes)

83

Confirmed with 83 specified in Wikipedia article. We now have our base for measuring city sphere influence.

Intersect with Social Media Data¶

Test city: Berlin¶

Select a single city from our previous dataset: Berlin

berlin_shp = lcities_shapes[lcities_shapes.index=="Berlin"]

ax = berlin_shp.plot(**plt_kwags)
ax.set_axis_off()

Simplify shape with a 1000 m tolerance.

berlin_shp = berlin_shp.simplify(tolerance=1000)

ax = berlin_shp.plot(**plt_kwags)
ax.set_axis_off()

Get the Well-Known Text (WKT) from the shape, for using this in the PostGis query. Note that we project the shape back to WGS1984, as this is what PostGis expects for the respective column projection.

berlin_wkt = berlin_shp.to_crs(CRS_WGS).iloc[0].wkt

berlin_wkt[:50]

'POLYGON ((13.480055493758181 52.674646382220786, 1'

Get posts for Berlin¶

Connect to DB¶

from dotenv import load_dotenv
load_dotenv(
    Path.cwd().parents[0] / '.env', override=True)

False

db_user = "postgres"
db_pass = os.getenv('POSTGRES_PASSWORD')
db_host = "hlldb"
db_port = "5432"
db_name = "hlldb"

db_connection_hll = psycopg2.connect(
        host=db_host,
        port=db_port,
        dbname=db_name,
        user=db_user,
        password=db_pass
)
db_conn_hll = tools.DbConn(db_connection_hll)
cur_hll = db_connection_hll.cursor()
cur_hll.execute("SELECT 1;")
db_conn = tools.DbConn(db_connection_hll)
print(cur_hll.statusmessage)

SELECT 1

MVIEWS_REF = "all_posts_de"

Check if foreign table has been imported already:

sql_query = f"""
SELECT EXISTS (
   SELECT FROM information_schema.tables 
   WHERE  table_schema = 'mviews'
   AND    table_name   = '{MVIEWS_REF}'
   );
"""
result = db_conn_hll.query(sql_query)

result["exists"][0]

True

PostGIS SQL Query DB¶

Select all posts that have a post_latlng within berlin_wkt. Remove the LIMIT 10 after testing.

%%time
cols = "origin_id, post_body, hashtags, emoji"
sql_query = f"""
CREATE MATERIALIZED VIEW mviews.all_posts_de_berlin AS
SELECT {cols}
FROM mviews.{MVIEWS_REF} p1
WHERE
    ST_Intersects(p1.post_latlng,
        ST_GeographyFromText(
        'SRID=4326; 
        {berlin_wkt}
        '))
--LIMIT 10
;
"""
# samples = db_conn_hll.query(sql_query)

CPU times: user 4 μs, sys: 2 μs, total: 6 μs
Wall time: 6.91 μs

Since this is a long running query, you may want to directly connect to the postgres backend and issue the command below with a terminal multiplexer (e.g. byobu).

Code(sql_query, language='sql')

CREATE MATERIALIZED VIEW mviews.all_posts_de_berlin AS
SELECT origin_id, post_body, hashtags, emoji
FROM mviews.all_posts_de p1
WHERE
    ST_Intersects(p1.post_latlng,
        ST_GeographyFromText(
        'SRID=4326; 
        POLYGON ((13.480055493758181 52.674646382220786, 13.523100123562969 52.64543019490999, 13.496752284956502 52.60509353897316, 13.508153615619362 52.59218325657117, 13.581539702290307 52.57088194268235, 13.58538711815 52.54847164274116, 13.635358749352802 52.54162879337082, 13.625906814496146 52.53014637487347, 13.656895567748453 52.529869512374475, 13.611517293565127 52.4706273344878, 13.64821501029994 52.47878196008851, 13.759028644500809 52.44257936571493, 13.737958595676579 52.4341377595168, 13.734537746788856 52.40197606952238, 13.686711975780021 52.38581835002964, 13.699997283713426 52.37512134120908, 13.648400720962364 52.33824183586158, 13.643364633348877 52.37739092653788, 13.604438755359714 52.37420620184696, 13.592762116960166 52.394169687794246, 13.535663613643239 52.389010455397866, 13.538379367552992 52.4006790647984, 13.479884998014338 52.39599663413856, 13.462321602696653 52.42062443642298, 13.418772685897803 52.40991740660862, 13.420984876146797 52.37624714214019, 13.388520781944774 52.37796846621366, 13.343248653089786 52.411619939032704, 13.311926097479672 52.399185472033935, 13.296130131822306 52.41645128544967, 13.24984400368276 52.40496478963489, 13.245797291593663 52.42080114604157, 13.13098952252752 52.387224852127325, 13.13639048580324 52.39818128513695, 13.088333218007719 52.41961143544562, 13.123176542788698 52.439364546677034, 13.117798300110973 52.47896793621238, 13.166457606193436 52.51012428932975, 13.117392920301656 52.51699532621558, 13.152953085443093 52.572781425885715, 13.128987291300037 52.587442103330595, 13.164263157366916 52.59890033000055, 13.217336441696338 52.58745559209763, 13.201606489131079 52.60638132821131, 13.220683272224864 52.62817872029866, 13.264274727247832 52.62692796976437, 13.28288465200498 52.66078466720817, 13.310033892418407 52.65737600814804, 13.302422603164219 52.62756060157729, 13.357296057047726 52.62317467321043, 13.396712093162703 52.64830139619886, 13.424365185087654 52.635630460353795, 13.46999622510021 52.652074839271435, 13.45084091686477 52.66272217420305, 13.480055493758181 52.674646382220786))
        '))
--LIMIT 10
;

E.g.:

byobu
docker exec -it lbsn-hlldb /bin/bash
psql -h localhost -p 5432 -U postgres hlldb
...
[F6]

samples.head()

Create notebook HTML¶

!jupyter nbconvert --to html_toc \
    --output-dir=../resources/html/ ./04_influence_sphere_cities.ipynb \
    --template=../nbconvert.tpl \
    --ExtractOutputPreprocessor.enabled=False >&- 2>&-

	GEN	EWZ	geometry
0	Flensburg	92550	MULTIPOLYGON (((526513.753 6075133.412, 526547...
1	Kiel	247717	MULTIPOLYGON (((575841.569 6032148.032, 575869...
2	Lübeck	218095	MULTIPOLYGON (((623056.151 5983746.445, 623191...
3	Neumünster	79502	MULTIPOLYGON (((565015.652 6000637.513, 565128...
4	Brunsbüttel	12603	MULTIPOLYGON (((510789.928 5977425.102, 510965...

	GEN	EWZ	geometry
1	Kiel	247717	MULTIPOLYGON (((575841.569 6032148.032, 575869...
2	Lübeck	218095	MULTIPOLYGON (((623056.151 5983746.445, 623191...
172	Hamburg	1892122	MULTIPOLYGON (((548446.373 5934566.984, 548491...
173	Braunschweig	251804	MULTIPOLYGON (((603674.529 5801815.287, 603852...
174	Salzgitter	104548	MULTIPOLYGON (((596376.613 5786390.436, 596380...

	origin_id	post_body	hashtags	emoji
0	3	#LitEvent in #Berlin: Bürger vs. Künstler: Tho...	[Berlin, LitEvent]	[]
1	3	I'm at Giatarget in Köln, Nordrhein Westfalen ...	[]	[]
2	3	#moniseum #iphoneonly #berlin #igersberlin #ip...	[berlin, berliner, berlino, berlinstagram, ige...	[]
3	3	I'm at iHOT in Thayngen https://t.co/GU4lYodAcw	[]	[]
4	3	I'm at Niedertor in Bad Neuenahr-Ahrweiler w/ ...	[]	[]