Eliminate Overlapping Dates

sql-server

I was looking for an efficient way to eliminate overlapping days when provided with a historical table that provided events that could overlap. In my case, I had dates show the range of a process. However, the multiple start and end dates could overlap, and even run concurrently. To eliminate double counting the days the process truly was in play I needed a way to find eliminate the overlap, and eliminate duplicate days when running in parallel. I researched ways to complete this and found the solution through this post. Solutions to Packing Date and Time Intervals Puzzle

Itzik provided an excellent solution, though I had to take time to digest. The only problem I ran into, was his solution was focused on a single user and dates. For my purposes, I need to evaluate an account and further break it down by overlap on a particular process. Grateful for SQL MVP’s contributions to the community as this was a brain bender!

Example of Problem and Solution with Overlapping Dates

Eliminate Overlapping Dates, with subgroupings (such as overlap only eliminated at process > task level, instead of just process level)
if object_id('tempdb..#Awesome') is not null
	drop table #awesome;

create table #awesome
	(
	unique_join_k	int
	,process			varchar(10)
	,start_date		date
	,end_date		date
	,grouper_id		as convert(uniqueidentifier, hashbytes('sha1',
	isnull(cast(unique_join_k as varchar(max)), '|')
	+ '|' + isnull(cast(process as varchar(max)), '|')
	))
)

insert into #awesome
select
	*
from
	(
	values
	(1000, 'process 1', cast('20131006' as date), cast('20131213' as date)),
	(1000, 'process 1', cast('20131225' as date), cast('20140215' as date)),
	(1000, 'process 1', cast('20140123' as date), cast('20140215' as date)),
	(1000, 'process 2', cast('20140328' as date), cast('20140617' as date)),
	(1001, 'process 1', cast('20130214' as date), cast('20130421' as date)),
	(1001, 'process 1', cast('20130827' as date), cast('20130926' as date)),
	(1001, 'process 1', cast('20131211' as date), cast('20131221' as date)),
	(1001, 'process 1', cast('20140227' as date), cast('20140405' as date)),
	(1002, 'process 1', cast('20130518' as date), cast('20130622' as date)),
	(1002, 'process 2', cast('20130527' as date), cast('20130618' as date)),
	(1003, 'process 1', cast('20130312' as date), cast('20130428' as date)),
	(1003, 'process 1', cast('20130510' as date), cast('20130614' as date)),
	(1003, 'process 1', cast('20130725' as date), cast('20131115' as date)),
	(1003, 'process 1', cast('20131010' as date), cast('20131014' as date)),
	(1003, 'process 2', cast('20140413' as date), cast('20140614' as date)),
	(1004, 'process 1', cast('20130116' as date), cast('20130425' as date)),
	(1004, 'process 1', cast('20130828' as date), cast('20130926' as date)),
	(1004, 'process 1', cast('20140101' as date), cast('20140210' as date)),
	(1004, 'process 1', cast('20140312' as date), cast('20140408' as date)),
	(1004, 'process 2', cast('20140513' as date), cast('20140806' as date)),
	(1004, 'process 2', cast('20140515' as date), cast('20140818' as date)),
	(1005, 'process 1', cast('20130606' as date), cast('20130724' as date)),
	(1005, 'process 2', cast('20131125' as date), cast('20140217' as date)),
	(1005, 'process 3', cast('20131220' as date), cast('20140131' as date)),
	(1005, 'process 3', cast('20140503' as date), cast('20140605' as date))) as vtable (unique_join_k, process, start_date, end_date);

select
	*
from
	#awesome as a
order by
	a.unique_join_k
	,a.[start_date];

with c1 as -- let e = end ordinals, let s = start ordinals
		(
			select
				grouper_id
				,unique_join_k
				,time_start =	start_date
				,is_start =		1
				,sqid_end =		null
				,sqid_start =	row_number() over (partition by grouper_id order by start_date)
			from
				#awesome cs
			union all
			select
				grouper_id
				,unique_join_k
				,time_start =	end_date
				,is_start =		0
				,sqid_end =		row_number() over (partition by grouper_id order by end_date)
				,sqid_start =	null
			from
				#awesome cs
		),
	c2 as (-- let se = start or end ordinal, namely, how many events (start or end) happened so far
			select
				c1.*
				,sqid_all_is_starts =	row_number() over (partition by grouper_id order by time_start, is_start desc)
			from
				c1
		), --select * from c2
	c3 as (
			/*******************************************************
                For start events, the expression sqid_start - (sqid_all_is_starts - sqid_start) - 1 represents how many sessions were active
                just before the current (hence - 1)

                For end events, the expression (sqid_all_is_starts - sqid_end) - sqid_end ) represents how many sessions are active
                right after this one

                The above two expressions are 0 exactly when a group of packed intervals 
                either starts or ends, respectively

                After filtering only events when a group of packed intervals either starts or ends,
                group each pair of adjacent start/end events
                *******************************************************/
			select
				grouper_id
				,unique_join_k
				,time_start
				,date_grouping =	floor((row_number() over (partition by grouper_id order by time_start) - 1) / 2 + 1)
			from
				c2
			where
				coalesce(sqid_start - (sqid_all_is_starts - sqid_start) - 1,
				(sqid_all_is_starts - sqid_end) - sqid_end) = 0
		) -- select * from c3

/*******************************************************
OVERLAPPING DATES ON SAME GROUP RESOLVED IN DISTINCT RANGES
*******************************************************/
,
	overlappedgone as (
			select
				*
				,sqid =	dense_rank() over (partition by grouper_id, date_grouping order by time_start asc)
			from
				c3

		)
select
	
	unique_join_k_start =			og1.unique_join_k 
	,unique_join_k_end =			og2.unique_join_k
	,calc_start_date =				og1.time_start
	,calc_end_date =				og2.time_start
	,unique_non_overlapping_days =	datediff(day, og1.time_start, og2.time_start)
	,grouper_id =					og1.grouper_id
from
	overlappedgone og1

	inner join overlappedgone og2
		on og1.date_grouping = og2.date_grouping
		and og1.grouper_id = og2.grouper_id
		and og2.sqid = 2
where
	og1.sqid = 1
order by
	og1.unique_join_k
	,calc_start_date