/* 
  Copyright (C) 2008 Kai Hertel

	This file is part of mmpong.

	mmpong is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	mmpong is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with mmpong.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <pthread.h>
#include <unistd.h>
#include <string.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include "thread.h"
#include "addrtable.h"
#include "lib/game.h"
#include "lib/message.h"


#define SMALL_TEAM 10
#define NOWAIT_INTERVAL 10

#define FLAG_REFRESH_STATE 	0x01
#define FLAG_ALLOW_RTTM 	0x02

struct thread_state {
	int *sockets;
	in_addr_t *peers;
	struct netmessage_buffer **netrecvbuf, **netsendbuf;
	float *positions, *diffs;
	char *flags;
	short *teams;
	long *team_count;
	unsigned thread_connects;
};

// helps get the realloc/memmove hell under control (consider unifying some of these macros with the ones in lib/message.c)
#define OFFSET(base, member) ((int)( ((void *)&(member)) - ((void *)&(base)) ))
#define INSTANCE(var, offset) *((void **)( ((void *)(var)) + (offset) ))
#define REGISTER(member) { OFFSET(thread_state_sample, thread_state_sample.member), sizeof(*thread_state_sample.member) }
static struct thread_state thread_state_sample;
static const struct {
	const int offset;
	const int membsize;
} thread_state_members[]= {
	REGISTER(sockets),
	REGISTER(netrecvbuf),
	REGISTER(netsendbuf),
	REGISTER(positions),
	REGISTER(diffs),
	REGISTER(flags),
	REGISTER(teams),
	{ 0, 0 }
};

// communicate with master thread
extern short volatile signal_exiting;
extern unsigned volatile bcast_interval;
extern unsigned volatile rttm_min;
extern struct gameplay_public volatile game;
extern pthread_rwlock_t gsync;
extern unsigned *n_connects;
extern int * volatile insert_socket;
extern in_addr_t * volatile insert_peer;
extern pthread_mutex_t *isync;
extern float *sums;
extern float *variances;
extern pthread_rwlock_t statsync;
extern short volatile n_teams;
extern short volatile verbosity;



static int add_client(const int, int, in_addr_t, struct thread_state *);
static int remove_client(const int, const int, struct thread_state *);
static short choose_team(const short, const long *);
static inline int compensate_timer(const unsigned, const struct timeval *, struct timeval *);
static inline int rttm_timer(const unsigned, const struct timeval *, struct timeval *);
static int send_status_full(const int, struct netmessage_buffer *, const struct gameplay_public *, const short);
static int send_status_update(const int, struct netmessage_buffer *, const struct gameplay_public *);
static int peer_cnt_decrease(unsigned *);



int worker_thread(_tlidx)
const void * _tlidx;
{
	const int tlidx=(int *)_tlidx - insert_socket;
	struct thread_state tstate= {
		.sockets= NULL, .peers= NULL,
		.positions= NULL, .diffs= NULL,
		.flags= NULL, .teams= NULL,
		.netsendbuf= NULL, .netrecvbuf= NULL,
		.team_count= calloc(n_teams, sizeof(long)),
		.thread_connects= 0
	};
	unsigned *scoresense= calloc(1, sizeof(unsigned) * n_teams);
	assert(tstate.team_count && scoresense);
	for (int idx= 0; idx< n_teams; idx++) 	// resume where we might have left off
		tstate.thread_connects+= n_connects[tlidx * n_teams + idx];

	struct timeval ratesync, lastrttm;
	gettimeofday(&ratesync, NULL);
	gettimeofday(&lastrttm, NULL);
	short gamerefresh= 1;
	int flushstart= 0, ratereset= 1;

	while (!signal_exiting) {
		// check for queued up connections
		if (insert_socket[tlidx]) {
			int hold= (-1);
			in_addr_t peer=0;
			// hand over
			if (pthread_mutex_lock(isync+tlidx))
				fprintf(stderr, "Worker Thread [%d]: Cannot acquire mutex lock.\n", tlidx);
			else {
				hold= insert_socket[tlidx];
				if (insert_peer) peer= insert_peer[tlidx];
				insert_socket[tlidx]= 0;
				if (pthread_mutex_unlock(isync+tlidx))
					fprintf(stderr, "Worker Thread [%d]: Cannot release mutex lock.\n", tlidx);
			}
			// accept
			if (hold != (-1))
				add_client(tlidx, hold, peer, &tstate); 	// the initial game state is flagged to be transmitted here
		}

		fd_set rfds, wfds;
		FD_ZERO(&rfds);
		FD_ZERO(&wfds);
		int nfds= 0;
		for (int idx= 0; idx< tstate.thread_connects; idx++) {
			FD_SET(tstate.sockets[idx], &rfds);
			if (nfds <= tstate.sockets[idx]) nfds= tstate.sockets[idx] +1;
			if (tstate.netsendbuf[idx]->pos < tstate.netsendbuf[idx]->len)
				FD_SET(tstate.sockets[idx], &wfds);
		}

		// set bcast timer
		struct timeval period;
		if (ratereset) {
			gettimeofday(&period, NULL);
			compensate_timer(bcast_interval, &ratesync, &period);
			ratereset= 0;
		}

		int retselect= select(nfds, &rfds, &wfds, NULL, &period);
		gettimeofday(&ratesync, NULL); 	// used to sync with desired notifier rate
		if (retselect == (-1)) {
			perror("Thread: select()");
			continue;
		}

		if (rttm_timer(rttm_min, &ratesync, &lastrttm)) {
			for (int idx= 0; idx< tstate.thread_connects; idx++)
				tstate.flags[idx]|= FLAG_ALLOW_RTTM;
		}

		// flush out pending messages when client sockets become available
		short setflush= 0;
		for (int cnt= 0; cnt< tstate.thread_connects; cnt++) {
			int idx= (flushstart + cnt) % tstate.thread_connects;
			if (FD_ISSET(tstate.sockets[idx], &wfds))
				if (netmessage_buffer_flush(tstate.sockets[idx], tstate.netsendbuf[idx]) == NETMSG_FAIL_DELIVER)
					if (!setflush) {
						flushstart= idx; 	// round-robin
						setflush= 1;
					}
		}

		// inform connected clients of the current game status periodically
		if ((1000L * 1000L) * (long)period.tv_sec + period.tv_usec <= NOWAIT_INTERVAL) {
			ratereset= 1;
			if (pthread_rwlock_rdlock(&gsync)) {
				fprintf(stderr, "Worker Thread [%d]: Cannot acquire read lock.\n", tlidx);
			}
			else {
				struct gameplay_public gamelocal;
				memcpy(&gamelocal, (void *)&game, sizeof(gamelocal));
				if (pthread_rwlock_unlock(&gsync))
					fprintf(stderr, "Worker Thread [%d]: Cannot release read lock.\n", tlidx);
				// broadcast update messages
				if (!gamerefresh) gamerefresh= (gamelocal.status != gamestatus_running);
				if (!gamerefresh) { 	// determine whether full state messages need to be sent
					for (int idx= 0; idx< n_teams; idx++)
						if (scoresense[idx] != gamelocal.pad_attr[idx].score) {
							gamerefresh= 1;
							scoresense[idx]= gamelocal.pad_attr[idx].score;
						}
				}
				if (gamerefresh)
					for (int idx= 0; idx< tstate.thread_connects; idx++)
						tstate.flags[idx]|= FLAG_REFRESH_STATE;
				for (int idx= 0; idx< tstate.thread_connects; idx++) {
//					printf("Worker Thread [%d]: Updating client [%d].\n", tlidx, tstate.sockets[idx]);
					int sendcode;
					if (tstate.flags[idx] & FLAG_REFRESH_STATE) {
						// keep sending full state messages until the next game starts
						if (gamelocal.status == gamestatus_running)
							tstate.flags[idx]&= ~FLAG_REFRESH_STATE;
						if ( ((sendcode= send_status_full(
							tstate.sockets[idx], tstate.netsendbuf[idx],
							&gamelocal, tstate.teams[idx])) != NETMSG_SUCCESS) && (sendcode != NETMSG_PARTIAL) )
								fprintf(stderr, "Worker Thread [%d]: Cannot send status update.\n", tlidx);
					}
					// incremental update
					else if ( ((sendcode= send_status_update(tstate.sockets[idx], tstate.netsendbuf[idx],
						&gamelocal)) != NETMSG_SUCCESS) && (sendcode != NETMSG_PARTIAL) )
							fprintf(stderr, "Worker Thread [%d]: Cannot bring client up to speed.\n", tlidx);

					if (sendcode == NETMSG_FAIL_SOCKET) {
						close(tstate.sockets[idx]);
						fprintf(stderr, "NETMSG_FAIL_SOCKET from socket.\n");
						remove_client(tlidx, idx, &tstate);
						idx--;
					}
//					printf("Worker Thread [%d]: Finished updating client [%d].\n", tlidx, tstate.sockets[idx]);
				}
			}
		}

		// receive and process messages from clients
		for (int idx= 0; idx< tstate.thread_connects; idx++)
			if (FD_ISSET(tstate.sockets[idx], &rfds)) {
				struct netmessage msg;
				int recvcode;
//				printf("Worker Thread [%d]: Processing client [%d].\n", tlidx, tstate.sockets[idx]);
				// process incoming messages
				float padpos= 0.5;
				short padupdate= 0;
				while ((recvcode= netmessage_recv(tstate.sockets[idx], &msg, sizeof(msg), tstate.netrecvbuf[idx])) == NETMSG_SUCCESS) {
					if (msg.hdr.id == NETMSG_EXIT) {
						close(tstate.sockets[idx]);
						fprintf(stderr, "NETMSG_EXIT from socket.\n");
						remove_client(tlidx, idx, &tstate);
						idx--;
						break;
					}
					if (msg.hdr.id != NETMSG_POS) continue;

					// use values outside the valid range for RTTM purposes
					if (msg.payload.position > PONG_RANGE_SPREAD) {
						if ((tstate.flags[idx] & FLAG_ALLOW_RTTM) == 0) continue;
						netmessage_send(tstate.sockets[idx], NETMSG_POS, &msg.payload.position, sizeof(msg.payload.position), tstate.netsendbuf[idx]);
						if (rttm_min >0) tstate.flags[idx]&= ~FLAG_ALLOW_RTTM;
						continue;
					}

					// regular client message processing
					padpos= ((float)msg.payload.position) / PONG_RANGE_SPREAD;
					padupdate++;
				}
				// drop client on serious conditions
				if ( (recvcode == NETMSG_FAIL_SOCKET) || (recvcode == NETMSG_FAIL_CHECKSUM) || (recvcode == NETMSG_END_SOCKET) ) {
					if (recvcode != NETMSG_END_SOCKET) {
						char *msg= "Socket fault";
						netmessage_send(tstate.sockets[idx], NETMSG_KICK, msg, strlen(msg) +1, tstate.netsendbuf[idx]);
						fprintf(stderr, "Socket fault (%s).\n", (recvcode == NETMSG_FAIL_SOCKET)? "NETMSG_FAIL_SOCKET":"NETMSG_FAIL_CHECKSUM");
					}
					close(tstate.sockets[idx]);
					remove_client(tlidx, idx, &tstate);
					idx--;
					padupdate= 0;
				}
				// propagate updated position
				if (padupdate) {
					if (pthread_rwlock_rdlock(&gsync))
						fprintf(stderr, "Worker Thread [%d]: Cannot acquire read lock.\n", tlidx);
					float teammean= ((float)(game.pad[ tstate.teams[idx] ].mean)) / PONG_RANGE_SPREAD; 	// use most current values
					unsigned teampeers= game.pad_attr[ tstate.teams[idx] ].peers;
					if (pthread_rwlock_unlock(&gsync))
						fprintf(stderr, "Worker Thread [%d]: Cannot release read lock.\n", tlidx);

					// semantics on this lock may seem strange, but make perfect sense, since write operations are deliberately designed to be non-overlapping
					if (pthread_rwlock_rdlock(&statsync)) {
						fprintf(stderr, "Worker Thread [%d]: Cannot acquire write lock.\n", tlidx);
					}
					else {
						// publish updated position
						sums[tlidx * n_teams + tstate.teams[idx]]+= padpos -tstate.positions[idx];
						float unitdiff= teammean - padpos;
						if (teampeers >0) unitdiff+= (padpos - tstate.positions[idx]) / teampeers; 	// improves accuracy somewhat
						variances[tlidx * n_teams + tstate.teams[idx]]+= (unitdiff * unitdiff) -tstate.diffs[idx];
						if (pthread_rwlock_unlock(&statsync))
							fprintf(stderr, "Worker Thread [%d]: Cannot release write lock.\n", tlidx);
						tstate.positions[idx]= padpos;
						tstate.diffs[idx]= unitdiff * unitdiff;
					}
				}
//				printf("Worker Thread [%d]: Finished processing client [%d].\n", tlidx, tstate.sockets[idx]);
			}
	}

	// clean up
	for (int idx= 0; idx< tstate.thread_connects; idx++) {
		char *msg= "Shutdown";
		netmessage_send(tstate.sockets[idx], NETMSG_KICK, msg, strlen(msg) +1, tstate.netsendbuf[idx]);
		close(tstate.sockets[idx]);
	}
	if (tstate.netrecvbuf)
		for (int idx= 0; idx< tstate.thread_connects; idx++)
			free(tstate.netrecvbuf[idx]);
	if (tstate.netsendbuf)
		for (int idx= 0; idx< tstate.thread_connects; idx++)
			free(tstate.netsendbuf[idx]);
	// semantics on this lock may seem strange, but make perfect sense, since write operations are non-overlapping
	if (pthread_rwlock_rdlock(&statsync))
		fprintf(stderr, "Worker Thread [%d]: Cannot acquire write lock.\n", tlidx);
	for (int idx= 0; idx< n_teams; idx++) {
		sums[tlidx * n_teams + idx]=
			variances[tlidx * n_teams + idx]=
			n_connects[tlidx * n_teams + idx]= 0;
	}
	if (pthread_rwlock_unlock(&statsync))
		fprintf(stderr, "Worker Thread [%d]: Cannot release write lock.\n", tlidx);

	// free remaining resources
	for (int idx= 0; thread_state_members[idx].membsize; idx++) {
		if (INSTANCE(&tstate, thread_state_members[idx].offset))
			free(INSTANCE(&tstate, thread_state_members[idx].offset));
		INSTANCE(&tstate, thread_state_members[idx].offset)= NULL;
	}
	if (tstate.team_count) free(tstate.team_count);
	if (insert_peer) free(tstate.peers);
	if (scoresense) free(scoresense);

	//pthread_exit((void *)0);
	return 0;
}



static int add_client(tlidx, newsock, newpeer, tstate)
const int tlidx;
int newsock;
in_addr_t newpeer;
struct thread_state *tstate;
{
	if (verbosity)
		printf("Worker Thread [%d]: Adding client [%d].\n", tlidx, newsock);
	for (int idx= 0; thread_state_members[idx].membsize; idx++) {
		INSTANCE(tstate, thread_state_members[idx].offset)=
			realloc(INSTANCE(tstate, thread_state_members[idx].offset),
				thread_state_members[idx].membsize * (tstate->thread_connects +1));
		assert(INSTANCE(tstate, thread_state_members[idx].offset));
	}
	if (insert_peer) {
		tstate->peers= realloc(tstate->peers, sizeof(in_addr_t) * (tstate->thread_connects +1));
		assert(tstate->peers);
	}

	tstate->sockets[tstate->thread_connects]= newsock;
	if (insert_peer) tstate->peers[tstate->thread_connects]= newpeer;
	tstate->netsendbuf[tstate->thread_connects]= tstate->netrecvbuf[tstate->thread_connects]= NULL;
	netmessage_buffer_init( tstate->netrecvbuf + tstate->thread_connects );
	netmessage_buffer_init( tstate->netsendbuf + tstate->thread_connects );
	tstate->positions[tstate->thread_connects]= .5;
	tstate->teams[tstate->thread_connects]= choose_team(n_teams, tstate->team_count);
	tstate->flags[tstate->thread_connects]= FLAG_REFRESH_STATE | FLAG_ALLOW_RTTM;

	// semantics on this lock may seem strange, but make perfect sense, since write operations are non-overlapping
	if (pthread_rwlock_rdlock(&statsync)) {
		close(newsock);
		fprintf(stderr, "Worker Thread [%d]: Cannot acquire shared write lock.\n", tlidx);
		return (-1);
	}
	else {
		unsigned teampeers= game.pad_attr[ tstate->teams[tstate->thread_connects] ].peers;
		float teammean= ( ( ((float)(game.pad[ tstate->teams[tstate->thread_connects] ].mean)) /PONG_RANGE_SPREAD ) * teampeers + tstate->positions[tstate->thread_connects] ) / (teampeers + 1); 	// improves accuracy somewhat
		float unitdiff= teammean - tstate->positions[tstate->thread_connects];
		tstate->diffs[tstate->thread_connects]= unitdiff * unitdiff;
		n_connects[ tlidx * n_teams + tstate->teams[tstate->thread_connects] ]++;
		sums[ tlidx * n_teams + tstate->teams[tstate->thread_connects] ]+= tstate->positions[tstate->thread_connects];
		variances[ tlidx * n_teams + tstate->teams[tstate->thread_connects] ]+= tstate->diffs[tstate->thread_connects];
		if (pthread_rwlock_unlock(&statsync))
			fprintf(stderr, "Worker Thread [%d]: Cannot release shared write lock.\n", tlidx);
		tstate->team_count[ tstate->teams[tstate->thread_connects] ]++;
		tstate->thread_connects++;
	}
	return 0;
}



static int remove_client(tlidx, sockidx, tstate)
const int tlidx, sockidx;
struct thread_state *tstate;
{
	if (verbosity)
		printf("Worker Thread [%d]: Removing client [%d].\n", tlidx, tstate->sockets[sockidx]);
	int retcode= 0;
	int ncidx= tlidx * n_teams + tstate->teams[sockidx];
	// semantics on this lock may seem strange, but make perfect sense, since write operations are non-overlapping
	if (pthread_rwlock_rdlock(&statsync)) {
		fprintf(stderr, "Worker Thread [%d]: Cannot acquire shared write lock.\n", tlidx);
		retcode= (-1);
	}
	n_connects[ncidx]--;
	sums[ncidx]-= tstate->positions[sockidx];
	variances[ncidx]-= tstate->diffs[sockidx];

	if (!n_connects[ncidx])
		sums[ncidx] = variances[ncidx] = 0;

	if (pthread_rwlock_unlock(&statsync)) {
		fprintf(stderr, "Worker Thread [%d]: Cannot release shared write lock.\n", tlidx);
		retcode= (-1);
	}
	tstate->thread_connects--;
	tstate->team_count[ tstate->teams[sockidx] ]--;
	if ( tstate->netrecvbuf[sockidx] ) free( tstate->netrecvbuf[sockidx] );
	if ( tstate->netsendbuf[sockidx] ) free( tstate->netsendbuf[sockidx] );

	if (insert_peer) {
		struct sockaddr_in sin;
		sin.sin_addr.s_addr= tstate->peers[sockidx];

		memmove(tstate->peers +sockidx, tstate->peers +sockidx +1, (tstate->thread_connects -sockidx) *sizeof(in_addr_t));
		tstate->peers= realloc(tstate->peers, sizeof(in_addr_t) * tstate->thread_connects);
		if (tstate->thread_connects) assert(tstate->peers);

		int val= addrtable_atomic(sin, peer_cnt_decrease, NULL);
		if ((val > 0) || ((-1) > val))
			addrtable_remove(sin);
	}

	for (int idx= 0; thread_state_members[idx].membsize; idx++) {
		memmove(INSTANCE(tstate, thread_state_members[idx].offset) +sockidx *thread_state_members[idx].membsize,
			INSTANCE(tstate, thread_state_members[idx].offset) +(sockidx +1) *thread_state_members[idx].membsize,
			(tstate->thread_connects -sockidx) *thread_state_members[idx].membsize);
		INSTANCE(tstate, thread_state_members[idx].offset)=
			realloc(INSTANCE(tstate, thread_state_members[idx].offset),
				thread_state_members[idx].membsize * tstate->thread_connects);
		if (tstate->thread_connects)
			assert(INSTANCE(tstate, thread_state_members[idx].offset));
//		else assert(!INSTANCE(tstate, thread_state_members[idx].offset)); 	-- this depends on the libc implementation specifics from all I know about POSIX
	}
	return retcode;
}



static short choose_team(n_teams, members)
const short n_teams;
const long *members;
{
	// deterministic start
	short minteam= 0;
	for (short idx= 0; idx< n_teams; idx++)
		if ((members[idx] <= SMALL_TEAM)&&(members[idx] < members[minteam])) minteam= idx;
	if (members[minteam] <= SMALL_TEAM) return minteam;
	// probabilistic behavior for larger teams (preserve order of magnitude balance)
	minteam= (short)(rand() % n_teams);
	for (short idx= 0; idx< n_teams; idx++) {
		int max= 8;
		long magn= 1;
		while ((members[idx] / magn)&&(max-- > 0)) magn*= 5;
		if (members[minteam] / magn) minteam= idx;
	}
	return minteam;
}



// adjust for time slip
static inline int compensate_timer(interval, sync, period)
const unsigned interval;
const struct timeval *sync;
struct timeval *period;
{
	long reltime= (long)interval
		- ( ((long)period->tv_sec) - ((long)sync->tv_sec) ) * (1000L * 1000L)
		- period->tv_usec + sync->tv_usec;
	if (reltime < NOWAIT_INTERVAL)
		reltime= NOWAIT_INTERVAL;
	period->tv_sec= reltime / (1000L * 1000L);
	period->tv_usec= reltime % (1000L * 1000L);
	return (reltime == NOWAIT_INTERVAL);
}



static inline int rttm_timer(interval, sync, last)
const unsigned interval;
const struct timeval *sync;
struct timeval *last;
{
	if (!interval) return 0;
	long reltime= ( ((long)sync->tv_sec) - ((long)last->tv_sec) ) * 1000L
		+ (sync->tv_usec - last->tv_usec) / 1000L;
	if (reltime < interval)
		return 0;
	memcpy(last, sync, sizeof(*last));
	return 1;
}



static int send_status_full(sock, netsendbuf, game, team)
const int sock;
struct netmessage_buffer *netsendbuf;
const struct gameplay_public *game;
const short team;
{
	struct game_state_full buf;
	buf.team= team;
	memcpy(&buf.game, game, sizeof(buf.game));
	return netmessage_send(sock, NETMSG_STAT, &buf, sizeof(buf), netsendbuf);
}



static int send_status_update(sock, netsendbuf, game)
const int sock;
struct netmessage_buffer *netsendbuf;
const struct gameplay_public *game;
{
	struct game_state_part buf;
	memcpy(&buf.stamp, &game->stamp, sizeof(buf.stamp));
	memcpy(&buf.ball, &game->ball, sizeof(buf.ball));
	memcpy(&buf.pad, &game->pad, sizeof(buf.pad));
	return netmessage_send(sock, NETMSG_UPDT, &buf, sizeof(buf), netsendbuf);
}



static int peer_cnt_decrease(cnt)
unsigned *cnt;
{
	if (verbosity >2) printf("Peer count == %u (@%p).\n", *cnt, cnt);
	if (*cnt> 0)
		(*cnt)--;
	return (*cnt == 0); 	// time to remove entry
}

