/***********************************************************************
	This file is a part of SkyBoost project 
		- idea and basic asm implementation by Arisu
		- conversion to c++ and advancing by Alexander Blade
***********************************************************************/

#include "skyboost.h"
#include "address_1_3_10_0.h"
#include "func_1_3_10_0.h"
#include "mem_1_3_10_0.h"

#include <windows.h>
#include <tlhelp32.h>
#include <vector>

#ifdef DEBUG
	#error due to thunk and other stuff generation you can't build in debug
#endif

// #define CHECKCOLL // colliding addresses detection
// #define TESTING   // allows to enable/disable boost on hotkey -- causes crashes if thread ip is on changed addr
// #define ADDRDUMP  

void Error(char *pattern, ...)
{
	char text[1024];
	va_list lst;
	va_start(lst, pattern);
	vsprintf_s(text, pattern, lst);
	va_end(lst);
	OutputDebugStringA(text);
	MessageBoxA(0, text, "SkyBoost critical error", MB_ICONERROR); 
	ExitProcess(0);
}

void Print(char *pattern, ...)
{
	char text[1024];
	va_list lst;
	va_start(lst, pattern);
	vsprintf_s(text, pattern, lst);
	va_end(lst);
	OutputDebugStringA(text);
}

std::vector<DWORD> MemAddresses;

void AddMemRegion(DWORD addr, DWORD size)
{
#ifdef CHECKCOLL
	DWORD colladdr = 0;
	for (DWORD I = 0; I < MemAddresses.size(); I++)
		if ( MemAddresses.at(I) >= addr && MemAddresses.at(I) < addr + size )
		{
			colladdr = MemAddresses.at(I);
			break;
		}
	if (colladdr) Error("new memory region[0x%08x..0x%08x] collides with old one 0x%08x", addr, addr + size - 1, colladdr);
	for (DWORD I = 0; I < size; I++) MemAddresses.push_back(addr + I);
#endif
}

#ifdef TESTING

struct TAddrBcp
{
	DWORD addr;
	BYTE val;
};

std::vector<TAddrBcp> MemBcp;

void ResetBcp()
{
	MemBcp.clear();
	MemAddresses.clear();
}

void RestoreBcp()
{
	for (DWORD I = 0; I < MemBcp.size(); I++)
	{
		DWORD Protect;
		TAddrBcp bcp = MemBcp.at(I);
		VirtualProtect((void *)bcp.addr, 1, PAGE_EXECUTE_READWRITE, &Protect);
		*(BYTE *)bcp.addr = bcp.val;
		VirtualProtect((void *)bcp.addr, 1, Protect, &Protect);
	}
}

#endif

#ifdef ADDRDUMP

FILE *logfile;
int fcount;

#else

DWORD minmem, maxmem;
BYTE *checkmem;

#endif

void BuildCheckMem(TAddr *addrarr, DWORD size)
{
#ifndef ADDRDUMP
	DWORD I;
	for (I = 0; I < size; I++)
	{
		if (addrarr[I].addr < minmem) minmem = addrarr[I].addr;
		if (addrarr[I].addr > maxmem) maxmem = addrarr[I].addr;
	}
	checkmem = (BYTE *)malloc(maxmem - minmem);
	memset(checkmem, 0, maxmem - minmem);
	for (I = 0; I < size; I++)
		checkmem[addrarr[I].addr - minmem] = (BYTE)addrarr[I].val;
#endif
}

void WriteData(DWORD Address, void *Data, DWORD size)
{
#ifdef ADDRDUMP
	if (!logfile) logfile = fopen("addrdump.log", "w");
#else
	if (memcmp((void *)Address, (void *)&checkmem[Address - minmem], size))
	{
		Print("changed address %x(%d), skipping", Address, size);
		return;
	}
#endif
	DWORD Protect;
	AddMemRegion(Address, size);
	VirtualProtect((void *)Address, size, PAGE_EXECUTE_READWRITE, &Protect);
	for (DWORD I = 0; I < size; I++) 
	{
#ifdef TESTING
		TAddrBcp bcp;
		bcp.addr = Address + I;
		bcp.val = *(BYTE *)(Address + I);
		MemBcp.push_back(bcp);
#endif
#ifdef ADDRDUMP
		Print("%d 0x%08x=%02x", fcount, Address, *(BYTE *)(Address + I));
		fprintf(logfile, "{0x%08x, 0x%02x},", Address + I, *(BYTE *)(Address + I));
		fflush(logfile);
		fcount++;
		if (fcount % 10 == 0) fprintf(logfile, "\n");
#endif
		*(BYTE *)(Address + I) = *(BYTE *)((DWORD)Data + I);
	}
	VirtualProtect((void *)Address, size, Protect, &Protect);
}

void WriteJmp(BYTE *AddressFrom, BYTE *AddressTo)
{
	BYTE code[5];
	code[0] = 0xE9;
	*(int *)&code[1] = AddressTo - AddressFrom - 5;
	WriteData((DWORD)AddressFrom, code, sizeof(code));
}

void WriteCall(BYTE *AddressFrom, BYTE *AddressTo)
{
	BYTE code[5];
	code[0] = 0xE8;
	*(int *)&code[1] = AddressTo - AddressFrom - 5;
	WriteData((DWORD)AddressFrom, code, sizeof(code));
}


/*bool CheckCall(DWORD Address, DWORD Func)
{
	bool result = (((int)Address + *(int *)(Address + 1) + 5) == Func);
	return result;
}

DWORD calls, time;
bool callres;

bool getcalls()
{
	calls++;
	DWORD t = GetTickCount();
	if (time < t)
	{
		Print("calls per second %d", calls);
		time = t + 1000;
		callres = (calls < 10000);
		calls = 0;
	}
	return callres;
}

VOIDASMPROC batchrendhook()
{
	_asm{
		push    ebp
		mov     ebp, esp
		sub     esp, 20h
		pushad
		call	getcalls
		cmp     al, 1
		jnz     end
		popad
		push 0x00E84536
		ret

		end:
		mov	esp, ebp
		pop ebp
		mov eax, 1
		ret 0x0C
	}
}*/

void Process_1_3_10_0()
{
 	BuildCheckMem(AddrCheckArr_1_3_10_0, sizeof(AddrCheckArr_1_3_10_0) / sizeof(AddrCheckArr_1_3_10_0[0]));
	DWORD I;
	for (I = 0; I < sizeof(addr_1_3_10_0::fpustuff0_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::fpustuff0_calls[I], (BYTE *)func_1_3_10_0::fpustuff0);
	for (I = 0; I < sizeof(addr_1_3_10_0::fpustuff1_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::fpustuff1_calls[I], (BYTE *)func_1_3_10_0::fpustuff1);
	for (I = 0; I < sizeof(addr_1_3_10_0::fpustuff2_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::fpustuff2_calls[I], (BYTE *)func_1_3_10_0::fpustuff2);
	for (I = 0; I < sizeof(addr_1_3_10_0::cpustuff0_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::cpustuff0_calls[I], (BYTE *)func_1_3_10_0::cpustuff0); // sub_C979A0
	for (I = 0; I < sizeof(addr_1_3_10_0::cpustuff1_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::cpustuff1_calls[I], (BYTE *)func_1_3_10_0::cpustuff1); // sub_4C1BB0
	for (I = 0; I < sizeof(addr_1_3_10_0::cpustuff2_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::cpustuff2_calls[I], (BYTE *)func_1_3_10_0::cpustuff2); // sub_54BE30
	for (I = 0; I < sizeof(addr_1_3_10_0::fpustuff3_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::fpustuff3_calls[I], (BYTE *)func_1_3_10_0::fpustuff3); // sub_C3B500
	WriteJmp((BYTE *)addr_1_3_10_0::fpustuff3_call_inside, (BYTE *)func_1_3_10_0::sub_43A560);
	
	for (I = 0; I < sizeof(addr_1_3_10_0::fpustuff4_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::fpustuff4_calls[I], (BYTE *)func_1_3_10_0::fpustuff4); // sub_C4B330
	for (I = 0; I < sizeof(addr_1_3_10_0::mult_coords_get_sqrt_calls) / 4; I++) WriteCall((BYTE *)addr_1_3_10_0::mult_coords_get_sqrt_calls[I], (BYTE *)func_1_3_10_0::sub_416440); // sub_416440

	// nops
	for (I = 0; I < sizeof(addr_1_3_10_0::cpustuff_nop5_0) / 4; I++) WriteData(addr_1_3_10_0::cpustuff_nop5_0[I], func_1_3_10_0::nop5_0, 5); // sub_64FE40
	
	// fabs
	for (I = 0; I < sizeof(addr_1_3_10_0::fabs_calls) / 4; I++) WriteData(addr_1_3_10_0::fabs_calls[I], func_1_3_10_0::fabs, func_1_3_10_0::fabs_size);
	// get this
	for (I = 0; I < sizeof(addr_1_3_10_0::get_this_calls) / 4; I++)	WriteData(addr_1_3_10_0::get_this_calls[I], func_1_3_10_0::get_this, func_1_3_10_0::get_this_size);
	// get this offset 4
	for (I = 0; I < sizeof(addr_1_3_10_0::get_this_off4_calls) / 4; I++) WriteData(addr_1_3_10_0::get_this_off4_calls[I], func_1_3_10_0::get_this_off4, func_1_3_10_0::get_this_off4_size);
	WriteData(addr_1_3_10_0::get_this_off4, func_1_3_10_0::get_this_off4_func, func_1_3_10_0::get_this_off4_func_size); // replace the func cuz of rtti ptrs
	// get this offset 8
	for (I = 0; I < sizeof(addr_1_3_10_0::get_this_off8_calls) / 4; I++) WriteData(addr_1_3_10_0::get_this_off8_calls[I], func_1_3_10_0::get_this_off8, func_1_3_10_0::get_this_off8_size);
	WriteData(addr_1_3_10_0::get_this_off8, func_1_3_10_0::get_this_off8_func, func_1_3_10_0::get_this_off8_func_size); // replace the func cuz of rtti ptrs etc
	// get this offset 3C(60d)
	for (I = 0; I < sizeof(addr_1_3_10_0::get_this_off3C_calls) / 4; I++) WriteData(addr_1_3_10_0::get_this_off3C_calls[I], func_1_3_10_0::get_this_off3C, func_1_3_10_0::get_this_off3C_size);
	WriteData(addr_1_3_10_0::get_this_off3C, func_1_3_10_0::get_this_off3C_func, func_1_3_10_0::get_this_off3C_func_size); // replace the func cuz of rtti ptrs etc
	// get this offset 94
	for (I = 0; I < sizeof(addr_1_3_10_0::get_this_off94_calls) / 4; I++) func_1_3_10_0::WriteGetThisOff94(addr_1_3_10_0::get_this_off94_calls[I]);
	WriteData(addr_1_3_10_0::get_this_off94, func_1_3_10_0::get_this_off94_func, func_1_3_10_0::get_this_off94_func_size); // replace the func cuz of rtti ptrs etc
	// get this offset B4
	for (I = 0; I < sizeof(addr_1_3_10_0::get_this_offB4_calls) / 4; I++) func_1_3_10_0::WriteGetThisOffB4(addr_1_3_10_0::get_this_offB4_calls[I]);
	WriteData(addr_1_3_10_0::get_this_offB4, func_1_3_10_0::get_this_offB4_func, func_1_3_10_0::get_this_offB4_func_size); // replace the func cuz of rtti ptrs etc
	// get this offset 38
	for (I = 0; I < sizeof(addr_1_3_10_0::get_this_off38_calls) / 4; I++) func_1_3_10_0::WriteGetThisOff38(addr_1_3_10_0::get_this_off38_calls[I]);
	WriteData(addr_1_3_10_0::get_this_off38, func_1_3_10_0::get_this_off38_func, func_1_3_10_0::get_this_off38_func_size); // replace the func cuz of rtti ptrs etc
	// get this offset 60(96d)
	for (I = 0; I < sizeof(addr_1_3_10_0::get_this_off60_calls) / 4; I++) WriteData(addr_1_3_10_0::get_this_off60_calls[I], func_1_3_10_0::get_this_off60, func_1_3_10_0::get_this_off60_size);
	// set this offset 60(96d)
	for (I = 0; I < sizeof(addr_1_3_10_0::set_this_off60_calls) / 4; I++) WriteData(addr_1_3_10_0::set_this_off60_calls[I], func_1_3_10_0::set_this_off60, func_1_3_10_0::set_this_off60_size);
	// get useless
	for (I = 0; I < sizeof(addr_1_3_10_0::get_useless_calls) / 4; I++) WriteData(addr_1_3_10_0::get_useless_calls[I], func_1_3_10_0::get_useless, func_1_3_10_0::get_useless_size);
	WriteData(addr_1_3_10_0::get_useless, func_1_3_10_0::get_useless_func, func_1_3_10_0::get_useless_func_size); // replace the func cuz of rtti ptrs etc
	// sqrt
	for (I = 0; I < sizeof(addr_1_3_10_0::sqrt_calls) / 4; I++) WriteData(addr_1_3_10_0::sqrt_calls[I], func_1_3_10_0::sqrt, func_1_3_10_0::sqrt_size);
	for (I = 0; I < sizeof(addr_1_3_10_0::sqrt_jumps) / 4; I++) WriteData(addr_1_3_10_0::sqrt_jumps[I], func_1_3_10_0::sqrt_jmped, func_1_3_10_0::sqrt_jmped_size);
	// determine_visiblity
	WriteData(addr_1_3_10_0::determine_visiblity, func_1_3_10_0::determine_visiblity, func_1_3_10_0::determine_visiblity_size);
	// dot_product
	WriteData(addr_1_3_10_0::dot_product, func_1_3_10_0::dot_product, func_1_3_10_0::dot_product_size);
	// dot_product_minus_unknown
	WriteData(addr_1_3_10_0::dot_product_minus_unknown, func_1_3_10_0::dot_product_minus_unknown, func_1_3_10_0::dot_product_minus_unknown_size);
	// get_row_column
	for (I = 0; I < sizeof(addr_1_3_10_0::get_row_column_calls) / 4; I++) func_1_3_10_0::WriteGetRowColumn(addr_1_3_10_0::get_row_column_calls[I]);
	WriteData(addr_1_3_10_0::get_row_column, func_1_3_10_0::get_row_column, func_1_3_10_0::get_row_column_size);
	// get_this_plus_12x
	WriteData(addr_1_3_10_0::get_this_plus_12x, func_1_3_10_0::get_this_plus_12x, func_1_3_10_0::get_this_plus_12x_size);
	WriteData(addr_1_3_10_0::get_this_plus_12x_1, func_1_3_10_0::get_this_plus_12x, func_1_3_10_0::get_this_plus_12x_size);
	// get_this_plus_112x
	WriteData(addr_1_3_10_0::get_this_plus_112x, func_1_3_10_0::get_this_plus_112x, func_1_3_10_0::get_this_plus_112x_size);
	// initialize_object_A, B
	WriteData(addr_1_3_10_0::initialize_object_A, func_1_3_10_0::initialize_object_A, func_1_3_10_0::initialize_object_A_size);
	WriteData(addr_1_3_10_0::initialize_object_B, func_1_3_10_0::initialize_object_B, func_1_3_10_0::initialize_object_B_size);
	// traverse_list
	WriteData(addr_1_3_10_0::traverse_list, func_1_3_10_0::traverse_list, func_1_3_10_0::traverse_list_size);	
	// test_bit_calls
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit1_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit1_calls[I], 0x1);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit2_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit2_calls[I], 0x2);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit6_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit6_calls[I], 0x20);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit11_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit11_calls[I], 0x400);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit12_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit12_calls[I], 0x800);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit15_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit15_calls[I], 0x4000);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit16_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit16_calls[I], 0x8000);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit18_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit18_calls[I], 0x20000);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit21_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit21_calls[I], 0x100000);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit23_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit23_calls[I], 0x400000);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit27_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit27_calls[I], 0x4000000);
	for (I = 0; I < sizeof(addr_1_3_10_0::test_bit28_calls) / 4; I++) func_1_3_10_0::WriteBitTest(addr_1_3_10_0::test_bit28_calls[I], 0x8000000);
	WriteData(addr_1_3_10_0::test_bit1, func_1_3_10_0::test_bit1, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit2, func_1_3_10_0::test_bit2, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit6, func_1_3_10_0::test_bit6, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit11, func_1_3_10_0::test_bit11, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit12, func_1_3_10_0::test_bit12, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit15, func_1_3_10_0::test_bit15, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit16, func_1_3_10_0::test_bit16, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit18, func_1_3_10_0::test_bit18, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit21, func_1_3_10_0::test_bit21, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit23, func_1_3_10_0::test_bit23, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit27, func_1_3_10_0::test_bit27, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_bit28, func_1_3_10_0::test_bit28, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_not_bit30, func_1_3_10_0::test_not_bit30, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_is_zero, func_1_3_10_0::test_is_zero, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_is_zero_1, func_1_3_10_0::test_is_zero, func_1_3_10_0::bit_test_size);
	WriteData(addr_1_3_10_0::test_is_zero_2, func_1_3_10_0::test_is_zero, func_1_3_10_0::bit_test_size);

	WriteCall((BYTE *)addr_1_3_10_0::cpustuff_unk_0, (BYTE *)func_1_3_10_0::sub_E863E0);
	//WriteJmp((BYTE *)0x00E84530, (BYTE *)batchrendhook); 
	*(DWORD *)addr_1_3_10_0::not_use_crtfplog_bool = 1;
	WriteData(addr_1_3_10_0::check_this_plus98_jmp_plus_48, func_1_3_10_0::check_this_plus98_jmp_plus_48, func_1_3_10_0::check_this_plus98_jmp_plus_48_size);
}

#ifdef TESTING

bool GetKeyPressed(BYTE key)
{
  return (GetKeyState(key) & 0x80000000) > 0;
}

BOOL StopProcess(BOOL Stop)
{
    HANDLE     Snap, ThrHandle;
    THREADENTRY32     Thread;

	DWORD ProcessId = GetCurrentProcessId();
    BOOL     Result = FALSE;
    Snap = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
    if (Snap != INVALID_HANDLE_VALUE)
    {
         Thread.dwSize = sizeof(THREADENTRY32);
         if (Thread32First(Snap, &Thread))
         do
         {
              if (Thread.th32OwnerProcessID == ProcessId)
              {
                   ThrHandle = OpenThread(THREAD_SUSPEND_RESUME, FALSE, Thread.th32ThreadID);
				   if (ThrHandle == 0 || Thread.th32ThreadID == GetCurrentThreadId()) continue;
                   if (Stop)
                   {
                        SuspendThread(ThrHandle);
                   } else ResumeThread(ThrHandle);
              }
         } while (Thread32Next(Snap, &Thread));
         CloseHandle(Snap);
         Result = TRUE;
    }
    return Result;
}


void Thread()
{
	Print("THREAD STARTED");
	bool state = FALSE;
	while (TRUE)
	{
		if (GetKeyPressed(VK_SHIFT) && GetKeyPressed(VK_F4))
		{
			StopProcess(TRUE);
			MessageBeep(0);
			if (!state) // turn on
			{
				Sleep(300);
				MessageBeep(0);
				Print("TURNING ON");
				ResetBcp();
				Process_1_3_10_0();
				Print("SUCCEED");
			} else // turn off
			{
				Print("TURNING OFF");
				RestoreBcp();
				Print("SUCCEED");
			}
			state = !state;
			Sleep(1000);
			MessageBeep(0);
			StopProcess(FALSE);
			Sleep(2000);
		}
		Sleep(100);
	}	
}

#endif

BOOL APIENTRY DllMain(HANDLE hModule, DWORD fdwReason, LPVOID lpReserved)
{
    switch (fdwReason)
    {
        case DLL_PROCESS_ATTACH: 
        {
			
#ifdef TESTING
			CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)Thread, NULL, 0, NULL);
#else
			Print("checking game version");	
			if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) 
				Error("Sorry, but your CPU doesn't support SSE2 instruction set, SkyBoost will not work");
			switch ( *(DWORD *)0x00DDDC00 ) 
			{
				case 0xE04589FF : // 1.3.10.0 (signed on 18 Dec 2011)  
				{
					Print("current (1.3.10.0) game version is supported");
					Process_1_3_10_0();
					break;
				}
				default : 
				{
					Error("Your game version is NOT supported, supported patches: 1.3.10.0us");
				};
			}
#endif
            break;
        }
        case DLL_PROCESS_DETACH:
        {
            break;
        }
    }

    return TRUE;
}