WITH SessionEvents AS ( SELECT event_name, -- Bereinige die URL von gängigen Tracking-Parametern REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r'([&?])gclid=[^&]*(&|$)', '\\1'), r'([&?])msclkid=[^&]*(&|$)', '\\1' ), r'([&?])utm_source=[^&]*(&|$)', '\\1' ), r'([&?])utm_medium=[^&]*(&|$)', '\\1' ), r'([&?])utm_campaign=[^&]*(&|$)', '\\1' ), r'([&?])utm_content=[^&]*(&|$)', '\\1' ), r'([&?])utm_term=[^&]*(&|$)', '\\1' ), r'([&?])source=[^&]*(&|$)', '\\1' ), -- Entferne überflüssige & oder ? am Ende der URL r'[&?]$', '' ) AS page_location, (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'campaign') AS campaign, (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'medium') AS medium FROM `dein-bq-projekt-und-dataset.events_*` WHERE event_name = 'session_start' AND _TABLE_SUFFIX BETWEEN '20250201' AND FORMAT_DATE('%Y%m%d', CURRENT_DATE()) AND (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'medium') = 'cpc' ), DuplicateLocations AS ( SELECT page_location, COUNT(DISTINCT campaign) AS distinct_campaigns, COUNT(*) AS total_events, STRING_AGG(DISTINCT campaign, ', ' ORDER BY campaign) AS campaign_list FROM SessionEvents GROUP BY page_location HAVING COUNT(DISTINCT campaign) > 1 ) SELECT d.page_location, d.distinct_campaigns, d.total_events, d.campaign_list, ARRAY_AGG(DISTINCT s.page_location ORDER BY s.page_location) AS original_urls FROM DuplicateLocations d JOIN SessionEvents s ON d.page_location = s.page_location GROUP BY d.page_location, d.distinct_campaigns, d.total_events, d.campaign_list ORDER BY d.total_events DESC;