Based on the hex board evaluation ideas I gave earlier, I've written a hex playing program. I found giving it a two-ply look-ahead got rid of the tendency to play new moves right next to old moves.
This program plays position surprisingly well, but still has some very obvious weaknesses. For example, it doesn't realize that bridges are fully connected.
Clearly some sort of hexy/six-style feedback from tactical calculations into positional evaluation is necessary to make this program play better. I think my approach is quite a bit more inherently flexible about ways of doing that than the circuit resistance approach which hexy and six use.
To use this code, create a hex board of the size you want, then use x() and o() to place pieces, and to have the computer place a piece use moveo(). The computer only plays O, which is trying to connect the top and bottom. Runs of length 1000 work reasonably well.
Note that although a two-ply look-ahead is substantially better than a one-ply look-ahead, a chess-style three-ply look-ahead would be of dubious value. A better approach would be to make the results of the move/counter-move pairs influence which positions are generated in the fill-in process.
from random import randrange
from copy import deepcopy
def hexprint(myarray):
for i in xrange(len(myarray)):
print (' ' * (len(myarray) - 1 - i)) + ' '.join([str(myarray[x][i]) for x in xrange(len(myarray))])
print
class hexplayer:
def __init__(self, size):
self.size = size
self.states = [['.' for i in xrange(self.size)] for j in xrange(self.size)]
def o(self, x, y):
assert self.states[x-1][y-1] == '.'
self.states[x-1][y-1] = 'O'
hexprint(self.states)
def x(self, x, y):
assert self.states[x-1][y-1] == '.'
self.states[x-1][y-1] = 'X'
hexprint(self.states)
def findbest(self, values):
best = -1
bestx = None
besty = None
for i in xrange(self.size):
for j in xrange(self.size):
if values[i][j] > best and (self.states[i][j] == '.'):
best = values[i][j]
bestx = i
besty = j
return (bestx, besty)
def moveo(self, runs):
values2 = [[-1 for i in xrange(self.size)] for j in xrange(self.size)]
responses = [[None for i in xrange(self.size)] for j in xrange(self.size)]
for i in xrange(self.size):
for j in xrange(self.size):
if self.states[i][j] == '.':
self.states[i][j] = 'O'
values, wonx = self.evaluate(runs)
bestx, besty = self.findbest(values)
assert self.states[bestx][besty] == '.'
responses[i][j] = (bestx, besty)
values2[i][j] = runs - wonx - values[bestx][besty]
self.states[i][j] = '.'
bestx, besty = self.findbest(values2)
self.states[bestx][besty] = 'O'
hexprint(values2)
print float(values2[bestx][besty]) / runs
r1, r2 = responses[bestx][besty]
print r1+1, r2+1
hexprint(self.states)
def evaluate(self, runs):
wonx = 0
top = self.size - 1
values = [[0 for i in xrange(self.size)] for j in xrange(self.size)]
for run in xrange(runs):
myvalues = deepcopy(self.states)
for i in xrange(self.size):
for j in xrange(self.size):
if myvalues[i][j] == '.':
myvalues[i][j] = ('X', 'O')[randrange(2)]
visited = [[0 for i in xrange(self.size)] for j in xrange(self.size)]
def neigbors(x, y):
r = []
if y > 0:
r.append((x, y-1))
if x > 0:
r.append((x-1,y-1))
if x > 0:
r.append((x-1, y))
if x < top:
r.append((x+1, y))
if y < top:
r.append((x, y+1))
if x < top:
r.append((x+1, y+1))
return r
remaining = [(0, i) for i in xrange(self.size)]
while remaining:
x, y = remaining.pop()
if visited[x][y] != 0:
continue
if myvalues[x][y] != 'X':
continue
if x == top:
wonx += 1
break
visited[x][y] = 1
remaining.extend(neigbors(x, y))
else:
remaining = [(top, i) for i in xrange(self.size)]
while remaining:
x, y = remaining.pop()
if visited[x][y] != 0:
continue
if myvalues[x][y] != 'X':
continue
visited[x][y] = 2
remaining.extend(neigbors(x, y))
for i in xrange(self.size):
for j in xrange(self.size):
m = neigbors(i, j)
n = [visited[x][y] for (x, y) in m]
if (1 in n or i == 0) and (2 in n or i == top) and self.states[i][j] == '.':
values[i][j] += 1
return values, wonx
