diff --git a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp index 1781969e69..108944b901 100644 --- a/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/FragmentProgramDecompiler.cpp @@ -404,7 +404,7 @@ bool FragmentProgramDecompiler::handle_scb(u32 opcode) case RSX_FP_OPCODE_EX2: SetDst("exp2($0.xxxx)"); return true; case RSX_FP_OPCODE_FLR: SetDst("floor($0)"); return true; case RSX_FP_OPCODE_FRC: SetDst(getFunction(FUNCTION::FUNCTION_FRACT)); return true; - case RSX_FP_OPCODE_LIT: SetDst(getFloatTypeName(4) + "(1.0, $0.x, ($0.x > 0.0 ? exp($0.w * log2($0.y)) : 0.0), 1.0)"); return true; + case RSX_FP_OPCODE_LIT: SetDst("lit_legacy($0)"); return true; case RSX_FP_OPCODE_LIF: SetDst(getFloatTypeName(4) + "(1.0, $0.y, ($0.y > 0 ? pow(2.0, $0.w) : 0.0), 1.0)"); return true; case RSX_FP_OPCODE_LRP: LOG_ERROR(RSX, "Unimplemented SCB instruction: LRP"); return true; // TODO: Is this in the right category? case RSX_FP_OPCODE_LG2: SetDst("log2($0.xxxx)"); return true; diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index 02d510b361..4179abecf6 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -104,7 +104,9 @@ size_t fragment_program_hash::operator()(const RSXFragmentProgram& program) cons bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, const RSXFragmentProgram& binary2) const { if (binary1.texture_dimensions != binary2.texture_dimensions || binary1.unnormalized_coords != binary2.unnormalized_coords || - binary1.height != binary2.height || binary1.origin_mode != binary2.origin_mode || binary1.pixel_center_mode != binary2.pixel_center_mode) + binary1.height != binary2.height || binary1.origin_mode != binary2.origin_mode || binary1.pixel_center_mode != binary2.pixel_center_mode || + binary1.back_color_diffuse_output != binary2.back_color_diffuse_output || binary1.back_color_specular_output != binary2.back_color_specular_output || + binary1.front_back_color_enabled != binary2.front_back_color_enabled) return false; const qword *instBuffer1 = (const qword*)binary1.addr; const qword *instBuffer2 = (const qword*)binary2.addr; diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp index 0ca3cfd5d8..3b0e156868 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp @@ -562,10 +562,10 @@ std::string VertexProgramDecompiler::Decompile() case RSX_SCA_OPCODE_MOV: SetDSTSca("$s"); break; case RSX_SCA_OPCODE_RCP: SetDSTSca("(1.0 / $s)"); break; case RSX_SCA_OPCODE_RCC: SetDSTSca("clamp(1.0 / $s, 5.42101e-20, 1.884467e19)"); break; - case RSX_SCA_OPCODE_RSQ: SetDSTSca("(1.f / sqrt($s))"); break; + case RSX_SCA_OPCODE_RSQ: SetDSTSca("rsq_legacy($s)"); break; case RSX_SCA_OPCODE_EXP: SetDSTSca("exp($s)"); break; case RSX_SCA_OPCODE_LOG: SetDSTSca("log($s)"); break; - case RSX_SCA_OPCODE_LIT: SetDSTSca(getFloatTypeName(4) + "(1.0, $s.x, ($s.x > 0.0 ? exp($s.w * log2($s.y)) : 0.0), 1.0)"); break; + case RSX_SCA_OPCODE_LIT: SetDSTSca("lit_legacy($s)"); break; case RSX_SCA_OPCODE_BRA: { AddCode("$if ($cond)"); @@ -619,7 +619,7 @@ std::string VertexProgramDecompiler::Decompile() // works like BRI but shorter (RET o[1].x(TR);) AddCode("$ifcond return;"); break; - case RSX_SCA_OPCODE_LG2: SetDSTSca("log2($s)"); break; + case RSX_SCA_OPCODE_LG2: SetDSTSca("log2_legacy($s)"); break; case RSX_SCA_OPCODE_EX2: SetDSTSca("exp2($s)"); break; case RSX_SCA_OPCODE_SIN: SetDSTSca("sin($s)"); break; case RSX_SCA_OPCODE_COS: SetDSTSca("cos($s)"); break; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp index 2063a83a74..d5721cb1ed 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.cpp @@ -93,4 +93,40 @@ std::string compareFunctionImp(COMPARE f, const std::string &Op0, const std::str return "(" + Op0 + " != " + Op1 + ")"; } } + +void insert_d3d12_legacy_function(std::ostream& OS) +{ + OS << "float4 divsq_legacy(float4 num, float4 denum)\n"; + OS << "{\n"; + OS << " return num / sqrt(max(denum.xxxx, 1.E-10));\n"; + OS << "}\n"; + + OS << "float4 rcp_legacy(float4 denum)\n"; + OS << "{\n"; + OS << " return 1. / denum;\n"; + OS << "}\n"; + + OS << "float4 rsq_legacy(float4 val)\n"; + OS << "{\n"; + OS << " return float(1.0 / sqrt(max(val.x, 1.E-10))).xxxx;\n"; + OS << "}\n\n"; + + OS << "float4 log2_legacy(float4 val)\n"; + OS << "{\n"; + OS << " return log2(max(val.x, 1.E-10)).xxxx;\n"; + OS << "}\n\n"; + + OS << "float4 lit_legacy(float4 val)"; + OS << "{\n"; + OS << " float4 clamped_val = val;\n"; + OS << " clamped_val.x = max(val.x, 0);\n"; + OS << " clamped_val.y = max(val.y, 0);\n"; + OS << " float4 result;\n"; + OS << " result.x = 1.0;\n"; + OS << " result.w = 1.;\n"; + OS << " result.y = clamped_val.x;\n"; + OS << " result.z = clamped_val.x > 0.0 ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.0;\n"; + OS << " return result;\n"; + OS << "}\n\n"; +} #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h index 5aeaba5325..7f088418e4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12CommonDecompiler.h @@ -5,3 +5,5 @@ std::string getFloatTypeNameImp(size_t elementCount); std::string getFunctionImp(FUNCTION f); std::string compareFunctionImp(COMPARE f, const std::string &Op0, const std::string &Op1); + +void insert_d3d12_legacy_function(std::ostream&); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index f305fc6cd7..9884fffe74 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -143,35 +143,34 @@ void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS) void D3D12FragmentDecompiler::insertMainStart(std::stringstream & OS) { - // "lib" function - // 0.00001 is used as "some non zero very little number" - OS << "float4 divsq_legacy(float4 num, float4 denum)\n"; - OS << "{\n"; - OS << " return num / sqrt(max(denum.xxxx, 0.00001));\n"; - OS << "}\n"; - - OS << "float4 rcp_legacy(float4 denum)\n"; - OS << "{\n"; - OS << " return 1. / denum;\n"; - OS << "}\n"; - - OS << "float4 rsq_legacy(float4 denum)\n"; - OS << "{\n"; - OS << " return 1. / sqrt(max(denum, 0.00001));\n"; - OS << "}\n"; - + insert_d3d12_legacy_function(OS); const std::set output_value = { "r0", "r1", "r2", "r3", "r4", "h0", "h2", "h4", "h6", "h8" }; - OS << "void ps_impl(PixelInput In, inout float4 r0, inout float4 h0, inout float4 r1, inout float4 h2, inout float4 r2, inout float4 h4, inout float4 r3, inout float4 h6, inout float4 r4, inout float4 h8)" << std::endl; + OS << "void ps_impl(bool is_front_face, PixelInput In, inout float4 r0, inout float4 h0, inout float4 r1, inout float4 h2, inout float4 r2, inout float4 h4, inout float4 r3, inout float4 h6, inout float4 r4, inout float4 h8)" << std::endl; OS << "{" << std::endl; for (const ParamType &PT : m_parr.params[PF_PARAM_IN]) { for (const ParamItem &PI : PT.items) + { + if (m_prog.front_back_color_enabled) + { + if (PI.name == "spec_color" && m_prog.back_color_specular_output) + { + OS << " float4 spec_color = is_front_face ? In.dst_reg4 : In.spec_color;\n"; + continue; + } + if (PI.name == "diff_color" && m_prog.back_color_diffuse_output) + { + OS << " float4 diff_color = is_front_face ? In.dst_reg3 : In.diff_color;\n"; + continue; + } + } OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; + } } // A bit unclean, but works. OS << " " << "float4 gl_Position = In.Position;" << std::endl; @@ -210,7 +209,7 @@ void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) { OS << "}" << std::endl; OS << std::endl; - OS << "PixelOutput main(PixelInput In)" << std::endl; + OS << "PixelOutput main(PixelInput In, bool is_front_face : SV_IsFrontFace)" << std::endl; OS << "{" << std::endl; OS << " float4 r0 = float4(0., 0., 0., 0.);" << std::endl; OS << " float4 r1 = float4(0., 0., 0., 0.);" << std::endl; @@ -222,7 +221,7 @@ void D3D12FragmentDecompiler::insertMainEnd(std::stringstream & OS) OS << " float4 h4 = float4(0., 0., 0., 0.);" << std::endl; OS << " float4 h6 = float4(0., 0., 0., 0.);" << std::endl; OS << " float4 h8 = float4(0., 0., 0., 0.);" << std::endl; - OS << " ps_impl(In, r0, h0, r1, h2, r2, h4, r3, h6, r4, h8);" << std::endl; + OS << " ps_impl(is_front_face, In, r0, h0, r1, h2, r2, h4, r3, h6, r4, h8);" << std::endl; const std::pair table[] = { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 2121728680..d81b69ede9 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -303,9 +303,24 @@ void D3D12GSRender::end() get_current_resource_storage().command_list->SetPipelineState(std::get<0>(m_current_pso).Get()); std::chrono::time_point texture_duration_start = std::chrono::system_clock::now(); - if (std::get<2>(m_current_pso) > 0) + size_t texture_count = std::get<2>(m_current_pso); + if (texture_count > 0) { - upload_and_bind_textures(get_current_resource_storage().command_list.Get(), currentDescriptorIndex + 3 + vertex_buffer_count, std::get<2>(m_current_pso) > 0); + upload_and_bind_textures(get_current_resource_storage().command_list.Get(), texture_count); + + for (unsigned i = 0; i < texture_count; i++) + { + ID3D12Resource *tex_resource; + D3D12_SHADER_RESOURCE_VIEW_DESC srv; + std::tie(tex_resource, srv) = m_current_shader_resources[i]; + m_device->CreateShaderResourceView(tex_resource, &srv, + CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) + .Offset((INT)currentDescriptorIndex + 3 + (INT)vertex_buffer_count + (INT)i, m_descriptor_stride_srv_cbv_uav) + ); + m_device->CreateSampler(&m_current_samplers[i], + CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().sampler_descriptor_heap[get_current_resource_storage().sampler_descriptors_heap_index]->GetCPUDescriptorHandleForHeapStart()) + .Offset((UINT)get_current_resource_storage().current_sampler_index + (UINT)i, m_descriptor_stride_samplers)); + } get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(0, CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 1a4f6d4a40..b290478bc6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -127,6 +127,9 @@ private: // Used to fill unused texture slot ID3D12Resource *m_dummy_texture; + // Currently used shader resources / samplers descriptor + std::array, 16> m_current_shader_resources = {}; + std::array m_current_samplers = {}; public: D3D12GSRender(); virtual ~D3D12GSRender(); @@ -165,7 +168,7 @@ private: * Create necessary resource view/sampler descriptors in the per frame storage struct. * If the count of enabled texture is below texture_count, fills with dummy texture and sampler. */ - void upload_and_bind_textures(ID3D12GraphicsCommandList *command_list, size_t descriptor_index, size_t texture_count); + void upload_and_bind_textures(ID3D12GraphicsCommandList *command_list, size_t texture_count); /** * Creates render target if necessary. diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index b8950bd8b5..1884a2a59a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -195,7 +195,7 @@ void D3D12GSRender::load_program() D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, }; prop.Rasterization = CD3D12_RASTERIZER_DESC; - if (rsx::method_registers[NV4097_SET_CULL_FACE_ENABLE]) + if (!!rsx::method_registers[NV4097_SET_CULL_FACE_ENABLE]) { switch (rsx::method_registers[NV4097_SET_CULL_FACE]) { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index e24b3e1583..d5cf7caa24 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -171,12 +171,14 @@ D3D12_SHADER_RESOURCE_VIEW_DESC get_srv_descriptor_with_dimensions(const rsx::te } } -void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_list, size_t descriptor_index, size_t texture_count) +void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_list, size_t texture_count) { - size_t used_texture = 0; - - for (u32 i = 0; i < rsx::limits::textures_count; ++i) + for (u32 i = 0; i < 16; ++i) { + if (!m_textures_dirty[i]) + continue; + m_textures_dirty[i] = false; + if (!textures[i].enabled()) { // Now fill remaining texture slots with dummy texture/sampler @@ -190,21 +192,14 @@ void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_ D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); - m_device->CreateShaderResourceView(m_dummy_texture, &shader_resource_view_desc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) - .Offset((INT)descriptor_index + (INT)used_texture, m_descriptor_stride_srv_cbv_uav) - ); + m_current_shader_resources[i] = std::make_tuple(m_dummy_texture, shader_resource_view_desc); D3D12_SAMPLER_DESC sampler_desc = {}; sampler_desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - m_device->CreateSampler(&sampler_desc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().sampler_descriptor_heap[get_current_resource_storage().sampler_descriptors_heap_index]->GetCPUDescriptorHandleForHeapStart()) - .Offset((INT)get_current_resource_storage().current_sampler_index + (INT)used_texture, m_descriptor_stride_samplers) - ); - used_texture++; + m_current_samplers[i] = sampler_desc; continue; } size_t w = textures[i].width(), h = textures[i].height(); @@ -357,9 +352,7 @@ void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_ break; } - m_device->CreateShaderResourceView(vram_texture, &shared_resource_view_desc, - CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) - .Offset((UINT)descriptor_index + (UINT)used_texture, m_descriptor_stride_srv_cbv_uav)); + m_current_shader_resources[i] = std::make_tuple(vram_texture, shared_resource_view_desc); if (get_current_resource_storage().current_sampler_index + 16 > 2048) { @@ -373,13 +366,7 @@ void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_ }; command_list->SetDescriptorHeaps(2, descriptors); } - m_device->CreateSampler(&get_sampler_desc(textures[i]), - CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().sampler_descriptor_heap[get_current_resource_storage().sampler_descriptors_heap_index]->GetCPUDescriptorHandleForHeapStart()) - .Offset((UINT)get_current_resource_storage().current_sampler_index + (UINT)used_texture, m_descriptor_stride_samplers)); - - used_texture++; + m_current_samplers[i] = get_sampler_desc(textures[i]); } - - } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index f8f1d375be..53d44fbed7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -176,6 +176,8 @@ namespace void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) { + insert_d3d12_legacy_function(OS); + OS << "PixelInput main(uint vertex_id : SV_VertexID)" << std::endl; OS << "{" << std::endl; diff --git a/rpcs3/Emu/RSX/GL/GLCommonDecompiler.cpp b/rpcs3/Emu/RSX/GL/GLCommonDecompiler.cpp index b88d544c9f..e28942aff1 100644 --- a/rpcs3/Emu/RSX/GL/GLCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/GL/GLCommonDecompiler.cpp @@ -84,3 +84,39 @@ std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::st } throw EXCEPTION("Unknow compare function"); } + +void insert_glsl_legacy_function(std::ostream& OS) +{ + OS << "vec4 divsq_legacy(vec4 num, vec4 denum)\n"; + OS << "{\n"; + OS << " return num / sqrt(max(denum.xxxx, 1.E-10));\n"; + OS << "}\n"; + + OS << "vec4 rcp_legacy(vec4 denum)\n"; + OS << "{\n"; + OS << " return 1. / denum;\n"; + OS << "}\n"; + + OS << "vec4 rsq_legacy(vec4 val)\n"; + OS << "{\n"; + OS << " return float(1.0 / sqrt(max(val.x, 1.E-10))).xxxx;\n"; + OS << "}\n\n"; + + OS << "vec4 log2_legacy(vec4 val)\n"; + OS << "{\n"; + OS << " return log2(max(val.x, 1.E-10)).xxxx;\n"; + OS << "}\n\n"; + + OS << "vec4 lit_legacy(vec4 val)"; + OS << "{\n"; + OS << " vec4 clamped_val = val;\n"; + OS << " clamped_val.x = max(val.x, 0);\n"; + OS << " clamped_val.y = max(val.y, 0);\n"; + OS << " vec4 result;\n"; + OS << " result.x = 1.0;\n"; + OS << " result.w = 1.;\n"; + OS << " result.y = clamped_val.x;\n"; + OS << " result.z = clamped_val.x > 0.0 ? exp(clamped_val.w * log(max(clamped_val.y, 1.E-10))) : 0.0;\n"; + OS << " return result;\n"; + OS << "}\n\n"; +} diff --git a/rpcs3/Emu/RSX/GL/GLCommonDecompiler.h b/rpcs3/Emu/RSX/GL/GLCommonDecompiler.h index cee0d9b11c..6d4c11473a 100644 --- a/rpcs3/Emu/RSX/GL/GLCommonDecompiler.h +++ b/rpcs3/Emu/RSX/GL/GLCommonDecompiler.h @@ -4,3 +4,4 @@ std::string getFloatTypeNameImpl(size_t elementCount); std::string getFunctionImpl(FUNCTION f); std::string compareFunctionImpl(COMPARE f, const std::string &Op0, const std::string &Op1); +void insert_glsl_legacy_function(std::ostream& OS); \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index e05cbb1b32..3ef0038315 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -101,22 +101,7 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) { - // "lib" function - // 0.00001 is used as "some non zero very little number" - OS << "vec4 divsq_legacy(vec4 num, vec4 denum)\n"; - OS << "{\n"; - OS << " return num / sqrt(max(denum.xxxx, 0.00001));\n"; - OS << "}\n"; - - OS << "vec4 rcp_legacy(vec4 denum)\n"; - OS << "{\n"; - OS << " return 1. / denum;\n"; - OS << "}\n"; - - OS << "vec4 rsq_legacy(vec4 denum)\n"; - OS << "{\n"; - OS << " return 1. / sqrt(max(denum, 0.00001));\n"; - OS << "}\n"; + insert_glsl_legacy_function(OS); OS << "void main ()" << std::endl; OS << "{" << std::endl; diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index c448f48091..b5e3b31f5f 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -158,6 +158,8 @@ void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector> 12) & 0xF); result.pixel_center_mode = rsx::to_window_pixel_center((shader_window >> 16) & 0xF); @@ -760,6 +765,7 @@ namespace rsx method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] = 0xffffffff; method_registers[NV4097_SET_CONTEXT_DMA_REPORT] = CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_REPORT; + rsx::method_registers[NV4097_SET_TWO_SIDE_LIGHT_EN] = true; // Reset vertex attrib array for (int i = 0; i < limits::vertex_count; i++) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index f6cc2902fc..6904bbc07d 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -289,6 +289,7 @@ namespace rsx std::vector inline_vertex_array; bool m_rtts_dirty; + bool m_textures_dirty[16]; protected: std::array get_color_surface_addresses() const; u32 get_zeta_surface_address() const; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index cd645fe8a6..cb7d42d845 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -287,6 +287,15 @@ namespace rsx { rsx->m_rtts_dirty = true; } + + template + struct set_texture_dirty_bit + { + force_inline static void impl(thread* rsx, u32 arg) + { + rsx->m_textures_dirty[index] = true; + } + }; } namespace nv308a @@ -834,6 +843,16 @@ namespace rsx bind(); bind(); bind(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); //NV308A bind_range();