From affd50f3015c7009a58aae0449615d67d8608362 Mon Sep 17 00:00:00 2001
From: Adrian Courreges <a.courreges@gmail.com>
Date: Fri, 23 Aug 2019 21:43:52 +0900
Subject: [PATCH] SSAO half-resolution integration.

Enable with "r.AmbientOcclusion.HalfRes 1".

Full details and information: http://www.adriancourreges.com/blog/2018/12/02/ue4-optimized-post-effects/
---
 .../Private/PostProcessAmbientOcclusion.usf        |  89 ++++++++++
 .../CompositionLighting/CompositionLighting.cpp    |  38 ++++-
 .../PostProcessAmbientOcclusion.cpp                | 185 ++++++++++++++++++++-
 .../PostProcessAmbientOcclusion.h                  |  19 ++-
 .../Private/PostProcess/SceneRenderTargets.cpp     |   4 +-
 5 files changed, 318 insertions(+), 17 deletions(-)

diff --git a/Engine/Shaders/Private/PostProcessAmbientOcclusion.usf b/Engine/Shaders/Private/PostProcessAmbientOcclusion.usf
index fbafaea4037..db3fc0f7d3c 100644
--- a/Engine/Shaders/Private/PostProcessAmbientOcclusion.usf
+++ b/Engine/Shaders/Private/PostProcessAmbientOcclusion.usf
@@ -170,6 +170,9 @@ SamplerState RandomNormalTextureSampler;
 // .xy:mul .zw:add   scale and bias to convert between BufferUV and HZB-UV
 float4 HZBRemapping;
 
+// Direction of the blur
+float2 BlurDirection;
+
 // could be moved to a more central spot
 // @param ScreenPos -1 .. 1
 float3 ReconstructCSPos(float SceneDepth, float2 ScreenPos)
@@ -574,6 +577,26 @@ float ComputeMipLevel(int sampleid, int step)
 	return log2(HzbStepMipLevelFactorValue * Scale * SamplePos);
 }
 
+// Pack Float32 to RGBA8
+float4 EncodeFloatRGBA( float v ) 
+{
+	float4 enc = float4(1.0, 255.0, 255.0 * 255.0, 255.0 * 255.0 * 255.0) * v;
+	enc = frac(enc);
+	enc -= enc.yzww * float4(1.0/255.0, 1.0/255.0, 1.0/255.0, 0.0);
+	return enc;
+}
+
+// Unpack RGBA8 to Float32
+float DecodeFloatRGBA( float4 rgba ) 
+{
+	return dot( rgba, float4(1.0, 1/255.0, 1/(255.0*255.0), 1/(255.0*255.0*255.0)) );
+}
+
+float DecodeFloatRGB( float3 rgb ) 
+{
+	return DecodeFloatRGBA( float4(rgb, 0) );
+}
+
 // the main pixel shader that computes ambient occlusion
 void MainPSandCS(in float4 UVAndScreenPos, float4 SvPosition, out float4 OutColor)
 {
@@ -820,6 +843,10 @@ void MainPSandCS(in float4 UVAndScreenPos, float4 SvPosition, out float4 OutColo
 //		OutColor = (WeightA - 0.333f) / 0.666f;
 	}
 #endif
+
+	// Only for half-res path, output extra linear depth into GBA
+	float linearDepth = ConvertFromDeviceZ(LookupDeviceZ(UV)) / ConvertFromDeviceZ(0.0);
+	OutColor.gba = EncodeFloatRGBA( 1.0 - linearDepth ).rgb;
 }
 
 void MainPS(in noperspective float4 UVAndScreenPos : TEXCOORD0, float4 SvPosition : SV_POSITION, out float4 OutColor : SV_Target0)
@@ -883,3 +910,65 @@ void BasePassAOPS(in noperspective float4 UVAndScreenPos : TEXCOORD0, out float4
 
 	OutColor = lerp(1.0f, ScreenSpaceData.AmbientOcclusion * ScreenSpaceData.GBuffer.GBufferAO, AOMask * StaticFraction);
 }
+
+// Apply a directional blur with with neighbor weights adjusted depending on their depth similarity
+void MainDepthBlur(in noperspective float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
+{
+	float2 UV = UVAndScreenPos.xy;
+	
+	#define DIRECTIONAL_TAP_COUNT 2 // how many extra taps from the center, along the 2 directions
+
+	// Gaussian weights
+	const float WEIGHTS[2 * DIRECTIONAL_TAP_COUNT + 1] = { 0.067, 0.242, 0.383, 0.242, 0.067};
+	float4 TapValues[2 * DIRECTIONAL_TAP_COUNT + 1];
+	
+	// Current pixel
+	float4 CenterTap = Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, UV);
+	float CenterDepth = DecodeFloatRGB(CenterTap.gba);
+	TapValues[DIRECTIONAL_TAP_COUNT] = CenterTap;
+	
+	float accumulator = 0;
+
+#if 1 // Depth-aware blur, adjust weight depending on depth
+	float weightAcc = WEIGHTS[DIRECTIONAL_TAP_COUNT];
+	accumulator = CenterTap.r * weightAcc;
+	
+	for (float dir = -1.0f; dir <= 1.0f; dir += 2.0f) // along both directions
+	{
+		for (int i = 1; i <= DIRECTIONAL_TAP_COUNT; i++)
+		{
+			float2 TapUV = UV;
+			TapUV += i * (dir * BlurDirection.xy * PostprocessInput0Size.zw);
+			
+			float4 TapValue = Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, TapUV);
+			float TapDepth = DecodeFloatRGB(TapValue.gba);
+			float diff = CenterDepth - TapDepth;
+			diff *= 10.0f; // TODO expose this
+			float TapWeight = WEIGHTS[DIRECTIONAL_TAP_COUNT + i] / (1.0f + abs(diff));
+			accumulator += TapValue.r * TapWeight;
+			weightAcc += TapWeight;
+		}
+	}
+	
+	accumulator /= weightAcc;
+	
+#else // Non-depth-aware blur (standard gaussian)
+	for (int dir = -1; dir <= 1; dir += 2) // along both directions
+	{
+		for (int i = 1; i <= DIRECTIONAL_TAP_COUNT; i++)
+		{
+			float2 TapUV = UV;
+			TapUV += i * (dir * BlurDirection.xy * PostprocessInput0Size.zw);
+			TapValues[DIRECTIONAL_TAP_COUNT + (dir * i)] = Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, TapUV);
+		}
+	}
+	
+	for (int i = 0; i < 2 * DIRECTIONAL_TAP_COUNT + 1; i++)
+	{
+		accumulator += TapValues[i].r * WEIGHTS[i];
+	}
+#endif
+	
+	OutColor = CenterTap;
+	OutColor.r = accumulator;
+}
\ No newline at end of file
diff --git a/Engine/Source/Runtime/Renderer/Private/CompositionLighting/CompositionLighting.cpp b/Engine/Source/Runtime/Renderer/Private/CompositionLighting/CompositionLighting.cpp
index 61db37bd87c..93f60d85bb6 100644
--- a/Engine/Source/Runtime/Renderer/Private/CompositionLighting/CompositionLighting.cpp
+++ b/Engine/Source/Runtime/Renderer/Private/CompositionLighting/CompositionLighting.cpp
@@ -139,6 +139,8 @@ static FRenderingCompositeOutputRef AddPostProcessingAmbientOcclusion(FRHIComman
 {
 	check(Levels >= 0 && Levels <= 3);
 
+	bool IsSSAOHalfRes = FSSAOHelper::IsAmbientOcclusionHalfRes();
+
 	FRenderingCompositePass* AmbientOcclusionInMip1 = nullptr;
 	FRenderingCompositePass* AmbientOcclusionInMip2 = nullptr;
 	FRenderingCompositePass* AmbientOcclusionPassMip1 = nullptr; 
@@ -205,27 +207,45 @@ static FRenderingCompositeOutputRef AddPostProcessingAmbientOcclusion(FRHIComman
 		GBufferA = Context.Graph.RegisterPass(new(FMemStack::Get()) FRCPassPostProcessInput(SceneContext.GBufferA));
 	}
 
-	FRenderingCompositePass* AmbientOcclusionPassMip0 = Context.Graph.RegisterPass(new(FMemStack::Get()) FRCPassPostProcessAmbientOcclusion(Context.View, FullResAOType, false));
+	FRenderingCompositePass* AmbientOcclusionPassMip0 = Context.Graph.RegisterPass(new(FMemStack::Get()) FRCPassPostProcessAmbientOcclusion(Context.View, FullResAOType, false, IsSSAOHalfRes));
 	AmbientOcclusionPassMip0->SetInput(ePId_Input0, GBufferA);
 	AmbientOcclusionPassMip0->SetInput(ePId_Input1, AmbientOcclusionInMip1);
 	AmbientOcclusionPassMip0->SetInput(ePId_Input2, AmbientOcclusionPassMip1);
 	AmbientOcclusionPassMip0->SetInput(ePId_Input3, HZBInput);
 
-	// to make sure this pass is processed as well (before), needed to make process decals before computing AO
-	if(AmbientOcclusionInMip1)
+	SceneContext.bScreenSpaceAOIsValid = true;
+
+	if (!IsSSAOHalfRes)
 	{
-		AmbientOcclusionInMip1->AddDependency(Context.FinalOutput);
+		// to make sure this pass is processed as well (before), needed to make process decals before computing AO
+		if (AmbientOcclusionInMip1)
+		{
+			AmbientOcclusionInMip1->AddDependency(Context.FinalOutput);
+		}
+		else
+		{
+			AmbientOcclusionPassMip0->AddDependency(Context.FinalOutput);
+		}
+
+		Context.FinalOutput = FRenderingCompositeOutputRef(AmbientOcclusionPassMip0);
+
+		return FRenderingCompositeOutputRef(AmbientOcclusionPassMip0);
 	}
 	else
 	{
-		AmbientOcclusionPassMip0->AddDependency(Context.FinalOutput);
-	}
+		// Half-res path
+		FRenderingCompositePass* AmbientOcclusionBlur1 = Context.Graph.RegisterPass(new(FMemStack::Get()) FRCPassPostProcessAOBlur(true));
+		AmbientOcclusionBlur1->SetInput(ePId_Input0, AmbientOcclusionPassMip0);
 
-	Context.FinalOutput = FRenderingCompositeOutputRef(AmbientOcclusionPassMip0);
+		FRenderingCompositePass* AmbientOcclusionBlur2 = Context.Graph.RegisterPass(new(FMemStack::Get()) FRCPassPostProcessAOBlur(false));
+		AmbientOcclusionBlur2->SetInput(ePId_Input0, AmbientOcclusionBlur1);
 
-	SceneContext.bScreenSpaceAOIsValid = true;
+		AmbientOcclusionBlur2->AddDependency(Context.FinalOutput);
+
+		Context.FinalOutput = FRenderingCompositeOutputRef(AmbientOcclusionBlur2);
 
-	return FRenderingCompositeOutputRef(AmbientOcclusionPassMip0);
+		return FRenderingCompositeOutputRef(AmbientOcclusionBlur2);
+	}
 }
 
 void FCompositionLighting::ProcessBeforeBasePass(FRHICommandListImmediate& RHICmdList, FViewInfo& View, bool bDBuffer, uint32 SSAOLevels)
diff --git a/Engine/Source/Runtime/Renderer/Private/CompositionLighting/PostProcessAmbientOcclusion.cpp b/Engine/Source/Runtime/Renderer/Private/CompositionLighting/PostProcessAmbientOcclusion.cpp
index d920cce8e1a..4e5bd4b6d06 100644
--- a/Engine/Source/Runtime/Renderer/Private/CompositionLighting/PostProcessAmbientOcclusion.cpp
+++ b/Engine/Source/Runtime/Renderer/Private/CompositionLighting/PostProcessAmbientOcclusion.cpp
@@ -86,6 +86,15 @@ static TAutoConsoleVariable<int32> CVarAmbientOcclusionDepthBoundsTest(
 	TEXT("Whether to use depth bounds test to cull distant pixels during AO pass. This option is only valid when pixel shader path is used (r.AmbientOcclusion.Compute=0), without upsampling."),
 	ECVF_RenderThreadSafe);
 
+static TAutoConsoleVariable<int32> CVarAmbientOcclusionHalfRes(
+	TEXT("r.AmbientOcclusion.HalfRes"),
+	0,
+	TEXT("Computes SSAO at half-resolution, smoothed with a depth-aware blur.\n")
+	TEXT("Useful when the full-resolution path is still too slow at minimum quality. (Can be twice twice faster)\n")
+	TEXT(" 0: disabled (default, full resolution SSAO)\n")
+	TEXT(" 1: enabled (half-resolution SSAO)"),
+	ECVF_Scalability | ECVF_RenderThreadSafe);
+
 float FSSAOHelper::GetAmbientOcclusionQualityRT(const FSceneView& View)
 {
 	float CVarValue = CVarAmbientOcclusionMaxQuality.GetValueOnRenderThread();
@@ -164,6 +173,10 @@ bool FSSAOHelper::IsAmbientOcclusionAsyncCompute(const FViewInfo& View, uint32 A
 	return false;
 }
 
+bool FSSAOHelper::IsAmbientOcclusionHalfRes()
+{
+	return CVarAmbientOcclusionHalfRes.GetValueOnRenderThread() > 0;
+}
 // @return 0:off, 0..3
 uint32 FSSAOHelper::ComputeAmbientOcclusionPassCount(const FViewInfo& View)
 {
@@ -702,9 +715,10 @@ void FRCPassPostProcessAmbientOcclusion::DispatchCS(TRHICmdList& RHICmdList, con
 
 // --------------------------------------------------------
 
-FRCPassPostProcessAmbientOcclusion::FRCPassPostProcessAmbientOcclusion(const FSceneView& View, ESSAOType InAOType, bool bInAOSetupAsInput)
+FRCPassPostProcessAmbientOcclusion::FRCPassPostProcessAmbientOcclusion(const FSceneView& View, ESSAOType InAOType, bool bInAOSetupAsInput, bool bInHalfRes)
 	: AOType(InAOType)
-	, bAOSetupAsInput(bInAOSetupAsInput)	
+	, bAOSetupAsInput(bInAOSetupAsInput)
+	, bHalfRes(bInHalfRes)
 {
 }
 
@@ -779,7 +793,7 @@ void FRCPassPostProcessAmbientOcclusion::ProcessPS(FRenderingCompositePassContex
 	const FIntRect& ViewRect, const FIntPoint& TexSize, int32 ShaderQuality, bool bDoUpsample)
 {
 	// We do not support the depth bounds optimization if we are in MSAA. To do so we would have to resolve the depth buffer here OR use a multisample texture for our AO target.
-	const bool bDepthBoundsTestEnabled = GSupportsDepthBoundsTest && SceneDepthBuffer && CVarAmbientOcclusionDepthBoundsTest.GetValueOnRenderThread() && SceneDepthBuffer->TargetableTexture->GetNumSamples() == 1;
+	const bool bDepthBoundsTestEnabled = !bHalfRes && GSupportsDepthBoundsTest && SceneDepthBuffer && CVarAmbientOcclusionDepthBoundsTest.GetValueOnRenderThread() && SceneDepthBuffer->TargetableTexture->GetNumSamples() == 1;
 
 	// Set the view family's render target/viewport.
 
@@ -898,7 +912,7 @@ void FRCPassPostProcessAmbientOcclusion::Process(FRenderingCompositePassContext&
 	const FSceneRenderTargetItem* DestRenderTarget = 0;
 	FSceneRenderTargets& SceneContext = FSceneRenderTargets::Get(Context.RHICmdList);
 
-	if(bAOSetupAsInput)
+	if(bAOSetupAsInput || bHalfRes)
 	{
 		DestRenderTarget = &PassOutputs[0].RequestSurface(Context);
 	}
@@ -913,7 +927,8 @@ void FRCPassPostProcessAmbientOcclusion::Process(FRenderingCompositePassContext&
 	// usually 1, 2, 4 or 8
 	uint32 ScaleToFullRes = SceneContext.GetBufferSizeXY().X / TexSize.X;
 
-	FIntRect ViewRect = FIntRect::DivideAndRoundUp(View.ViewRect, ScaleToFullRes);
+	uint32 HalfResFactor = bHalfRes ? 2 : 1;
+	FIntRect ViewRect = FIntRect::DivideAndRoundUp(View.ViewRect, ScaleToFullRes * HalfResFactor);
 
 	// 0..4, 0:low 4:high
 	const int32 ShaderQuality = FSSAOHelper::GetAmbientOcclusionShaderLevel(Context.View);
@@ -929,7 +944,7 @@ void FRCPassPostProcessAmbientOcclusion::Process(FRenderingCompositePassContext&
 			? &SceneContext.SceneDepthZ->GetRenderTargetItem()
 			: nullptr;
 
-		ProcessPS(Context, DestRenderTarget, SceneDepthBuffer, ViewRect, TexSize, ShaderQuality, bDoUpsample);
+		ProcessPS(Context, DestRenderTarget, SceneDepthBuffer, ViewRect, TexSize / HalfResFactor, ShaderQuality, bDoUpsample);
 	}
 	else
 	{
@@ -939,6 +954,16 @@ void FRCPassPostProcessAmbientOcclusion::Process(FRenderingCompositePassContext&
 
 FPooledRenderTargetDesc FRCPassPostProcessAmbientOcclusion::ComputeOutputDesc(EPassOutputId InPassOutputId) const
 {
+	if (bHalfRes) {
+		FPooledRenderTargetDesc Ret = GetInput(ePId_Input0)->GetOutput()->RenderTargetDesc;
+
+		Ret.Reset();
+		Ret.Extent = FIntPoint::DivideAndRoundUp(Ret.Extent, 2);
+
+		Ret.DebugName = TEXT("AmbientOcclusionHalfRaw");
+		return Ret;
+	}
+
 	if(!bAOSetupAsInput)
 	{
 		FPooledRenderTargetDesc Ret;
@@ -1088,3 +1113,151 @@ FPooledRenderTargetDesc FRCPassPostProcessBasePassAO::ComputeOutputDesc(EPassOut
 
 	return Ret;
 }
+
+//------------------------------------
+
+
+/** Depth-aware directional blur */
+class FPostProcessAOBlurPS : public FGlobalShader
+{
+	DECLARE_SHADER_TYPE(FPostProcessAOBlurPS, Global);
+
+	static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
+	{
+		return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM4);
+	}
+
+	static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
+	{
+		FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
+	}
+
+	/** Default constructor. */
+	FPostProcessAOBlurPS() {}
+
+public:
+	FPostProcessPassParameters PostprocessParameter;
+	FSceneTextureShaderParameters SceneTextureParameters;
+	FScreenSpaceAOParameters ScreenSpaceAOParams;
+	FShaderParameter BlurDirection;
+
+	/** Initialization constructor. */
+	FPostProcessAOBlurPS(const ShaderMetaType::CompiledShaderInitializerType& Initializer)
+		: FGlobalShader(Initializer)
+	{
+		PostprocessParameter.Bind(Initializer.ParameterMap);
+		SceneTextureParameters.Bind(Initializer);
+		ScreenSpaceAOParams.Bind(Initializer.ParameterMap);
+		BlurDirection.Bind(Initializer.ParameterMap, TEXT("BlurDirection"));
+	}
+
+	void SetParameters(const FRenderingCompositePassContext& Context, FIntPoint InputTextureSize, FVector4 BlurDirectionValue)
+	{
+		const FFinalPostProcessSettings& Settings = Context.View.FinalPostProcessSettings;
+		const FPixelShaderRHIParamRef ShaderRHI = GetPixelShader();
+
+		FGlobalShader::SetParameters<FViewUniformShaderParameters>(Context.RHICmdList, ShaderRHI, Context.View.ViewUniformBuffer);
+		PostprocessParameter.SetPS(Context.RHICmdList, ShaderRHI, Context, TStaticSamplerState<SF_Point, AM_Clamp, AM_Clamp, AM_Clamp>::GetRHI());
+		SceneTextureParameters.Set(Context.RHICmdList, ShaderRHI, Context.View.FeatureLevel, ESceneTextureSetupMode::All);
+		ScreenSpaceAOParams.Set(Context.RHICmdList, Context.View, ShaderRHI, InputTextureSize);
+		SetShaderValue(Context.RHICmdList, ShaderRHI, BlurDirection, BlurDirectionValue);
+	}
+
+	// FShader interface.
+	virtual bool Serialize(FArchive& Ar) override
+	{
+		bool bShaderHasOutdatedParameters = FGlobalShader::Serialize(Ar);
+		Ar << BlurDirection << PostprocessParameter << SceneTextureParameters << ScreenSpaceAOParams;
+		return bShaderHasOutdatedParameters;
+	}
+};
+
+IMPLEMENT_SHADER_TYPE(, FPostProcessAOBlurPS, TEXT("/Engine/Private/PostProcessAmbientOcclusion.usf"), TEXT("MainDepthBlur"), SF_Pixel);
+
+FPooledRenderTargetDesc FRCPassPostProcessAOBlur::ComputeOutputDesc(EPassOutputId InPassOutputId) const
+{
+	// Same as input
+	FPooledRenderTargetDesc Ret = GetInput(ePId_Input0)->GetOutput()->RenderTargetDesc;
+
+	if (mIsHorizontal)
+	{
+		// Up until the horizontal blur we store extra linear depth in GBA
+		Ret.Reset();
+		Ret.Format = PF_B8G8R8A8;
+	}
+
+	Ret.DebugName = mIsHorizontal ? TEXT("AOBlur Horizontal") : TEXT("AOBlur Vertical");
+
+	return Ret;
+}
+
+void FRCPassPostProcessAOBlur::Process(FRenderingCompositePassContext& Context)
+{
+	const FViewInfo& View = Context.View;
+
+	SCOPED_DRAW_EVENTF(Context.RHICmdList, ApplyAOBlur, TEXT("AO Blur"));
+
+	FSceneRenderTargets& SceneContext = FSceneRenderTargets::Get(Context.RHICmdList);
+
+	const FSceneRenderTargetItem* DestRenderTarget = 0;
+
+	if (mIsHorizontal)
+	{
+		DestRenderTarget = &PassOutputs[0].RequestSurface(Context);
+	}
+	else
+	{
+		// Vertical pass renders directly to the final SSAO buffer
+		DestRenderTarget = &SceneContext.ScreenSpaceAO->GetRenderTargetItem();
+	}
+
+	// Set the view family's render target/viewport.
+	Context.RHICmdList.TransitionResource(EResourceTransitionAccess::EWritable, DestRenderTarget->TargetableTexture);
+
+	FIntRect DestRect = View.ViewRect / 2;
+
+	SetRenderTarget(Context.RHICmdList, DestRenderTarget->TargetableTexture, FTextureRHIParamRef(), ESimpleRenderTargetMode::EExistingColorAndDepth);
+
+	Context.SetViewportAndCallRHI(DestRect);
+
+	FGraphicsPipelineStateInitializer GraphicsPSOInit;
+	Context.RHICmdList.ApplyCachedRenderTargets(GraphicsPSOInit);
+
+	// set the state
+	GraphicsPSOInit.BlendState = TStaticBlendState<>::GetRHI();
+
+	GraphicsPSOInit.RasterizerState = TStaticRasterizerState<>::GetRHI();
+	GraphicsPSOInit.DepthStencilState = TStaticDepthStencilState<false, CF_Always>::GetRHI();
+
+	TShaderMapRef<FPostProcessVS> VertexShader(Context.GetShaderMap());
+	TShaderMapRef<FPostProcessAOBlurPS> PixelShader(Context.GetShaderMap());
+
+	GraphicsPSOInit.BoundShaderState.VertexDeclarationRHI = GFilterVertexDeclaration.VertexDeclarationRHI;
+	GraphicsPSOInit.BoundShaderState.VertexShaderRHI = GETSAFERHISHADER_VERTEX(*VertexShader);
+	GraphicsPSOInit.BoundShaderState.PixelShaderRHI = GETSAFERHISHADER_PIXEL(*PixelShader);
+	GraphicsPSOInit.PrimitiveType = PT_TriangleList;
+
+	SetGraphicsPipelineState(Context.RHICmdList, GraphicsPSOInit);
+
+	const FVector4 DirHorizontal(1.0f, 0, 0, 0);
+	const FVector4 DirVertical(0, 1.0f, 0, 0);
+
+	VertexShader->SetParameters(Context);
+	PixelShader->SetParameters(Context, SceneContext.GetBufferSizeXY() / 2, mIsHorizontal ? DirHorizontal : DirVertical);
+
+	DrawPostProcessPass(
+		Context.RHICmdList,
+		0, 0,
+		DestRect.Width(), DestRect.Height(),
+		DestRect.Min.X, DestRect.Min.Y,
+		DestRect.Width(), DestRect.Height(),
+		DestRect.Size(),
+		SceneContext.GetBufferSizeXY() / 2,
+		*VertexShader,
+		View.StereoPass,
+		Context.HasHmdMesh(),
+		EDRF_UseTriangleOptimization);
+
+	Context.RHICmdList.CopyToResolveTarget(DestRenderTarget->TargetableTexture, DestRenderTarget->ShaderResourceTexture, FResolveParams());
+}
+
diff --git a/Engine/Source/Runtime/Renderer/Private/CompositionLighting/PostProcessAmbientOcclusion.h b/Engine/Source/Runtime/Renderer/Private/CompositionLighting/PostProcessAmbientOcclusion.h
index 96c3fec216e..c3cb39e3d4c 100644
--- a/Engine/Source/Runtime/Renderer/Private/CompositionLighting/PostProcessAmbientOcclusion.h
+++ b/Engine/Source/Runtime/Renderer/Private/CompositionLighting/PostProcessAmbientOcclusion.h
@@ -44,6 +44,7 @@ public:
 
 	static bool IsBasePassAmbientOcclusionRequired(const FViewInfo& View);
 	static bool IsAmbientOcclusionAsyncCompute(const FViewInfo& View, uint32 AOPassCount);
+	static bool IsAmbientOcclusionHalfRes();
 
 	// @return 0:off, 0..3
 	static uint32 ComputeAmbientOcclusionPassCount(const FViewInfo& View);
@@ -76,7 +77,7 @@ class FRCPassPostProcessAmbientOcclusion : public TRenderingCompositePassBase<4,
 {
 public:
 	// @param bInAOSetupAsInput true:use AO setup as input, false: use GBuffer normal and native z depth
-	FRCPassPostProcessAmbientOcclusion(const FSceneView& View, ESSAOType InAOType, bool bInAOSetupAsInput = true);
+	FRCPassPostProcessAmbientOcclusion(const FSceneView& View, ESSAOType InAOType, bool bInAOSetupAsInput = true, bool bInHalfRes = false);
 
 	// interface FRenderingCompositePass ---------
 	virtual void Process(FRenderingCompositePassContext& Context) override;
@@ -96,6 +97,7 @@ private:
 	
 	const ESSAOType AOType;
 	const bool bAOSetupAsInput;
+	const bool bHalfRes;
 };
 
 // apply the AO to the SceneColor (lightmapped object), extra pas that is not always needed
@@ -107,4 +109,19 @@ public:
 	virtual void Process(FRenderingCompositePassContext& Context) override;
 	virtual void Release() override { delete this; }
 	virtual FPooledRenderTargetDesc ComputeOutputDesc(EPassOutputId InPassOutputId) const override;
+};
+
+// Depth-aware blur along one direction
+class FRCPassPostProcessAOBlur : public TRenderingCompositePassBase<1, 1>
+{
+public:
+	FRCPassPostProcessAOBlur(bool IsHorizontal) : mIsHorizontal(IsHorizontal) {}
+
+	// interface FRenderingCompositePass ---------
+	virtual void Process(FRenderingCompositePassContext& Context) override;
+	virtual void Release() override { delete this; }
+	virtual FPooledRenderTargetDesc ComputeOutputDesc(EPassOutputId InPassOutputId) const override;
+
+private:
+	bool mIsHorizontal;
 };
\ No newline at end of file
diff --git a/Engine/Source/Runtime/Renderer/Private/PostProcess/SceneRenderTargets.cpp b/Engine/Source/Runtime/Renderer/Private/PostProcess/SceneRenderTargets.cpp
index a30bef48ad9..3631a16c2dc 100644
--- a/Engine/Source/Runtime/Renderer/Private/PostProcess/SceneRenderTargets.cpp
+++ b/Engine/Source/Runtime/Renderer/Private/PostProcess/SceneRenderTargets.cpp
@@ -2033,7 +2033,9 @@ void FSceneRenderTargets::AllocateDeferredShadingPathRenderTargets(FRHICommandLi
 	{
 		// Create the screen space ambient occlusion buffer
 		{
-			FPooledRenderTargetDesc Desc(FPooledRenderTargetDesc::Create2DDesc(BufferSize, PF_G8, FClearValueBinding::White, TexCreate_None, TexCreate_RenderTargetable, false));
+			static const auto HalfResSSAOCVar = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.AmbientOcclusion.HalfRes"));
+			uint32 HalfResFactor = HalfResSSAOCVar->GetValueOnRenderThread() ? 2 : 1;
+			FPooledRenderTargetDesc Desc(FPooledRenderTargetDesc::Create2DDesc(BufferSize / HalfResFactor, PF_G8, FClearValueBinding::White, TexCreate_None, TexCreate_RenderTargetable, false));
 			Desc.Flags |= GFastVRamConfig.ScreenSpaceAO;
 
 			if (CurrentFeatureLevel >= ERHIFeatureLevel::SM5)
-- 
2.14.1.windows.1