package backpropagation; import java.util.Random; import java.io.FileReader; import java.io.BufferedReader; import java.io.File; // for UTF import java.io.RandomAccessFile; // for UTF import java.io.IOException; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.PrintWriter; import java.util.StringTokenizer; import java.util.GregorianCalendar; import java.text.SimpleDateFormat; import gnu.regexp.*; /**************************************************************** * * Implementation of the well-known backpropagation algorithm * with optional number of layers.
* @Book{Szabo01,
* author = "R. Szabó",
* title = "Mobil robotok szimulációja",
* publisher = "Eötvös Kiadó",
* year = "2001",
* }
*
* Author: Richard Szabo
*
****************************************************************/
public class Backpropagation {
/****************************************************************/
/**
* Learning rate of the network.
*/
protected double learningRate = 0.01;
/**
* Threshold of the sigmoid function.
*/
protected double theta = 0.0;
/**
* Elasticity of the sigmoid function.
*/
protected double elasticity = 1.0;
/**
* Calming rate of the learning rate: 1.0 means constant learning rate.
*/
protected double calmingRate = 1.0;
/**
* Constant determining the influence of the previous weight change
* on the actual. 0.0 means no influence.
*/
protected double momentum = 0.0;
/**
* Changing rate of the elasticity. It must be in the [0.0,1.0].
*/
protected double elasticityRate = 0.0;
/****************************************************************/
/**
* Number of the layers, must be at least 2.
*/
protected int numberOfLayers;
/**
* Number of the neurons in layers, must be at least 1.
*/
protected int numberOfNeurons[];
/****************************************************************/
/**
* Layers of the network.
*/
protected Neuron layers[][];
/****************************************************************/
/**
* Summed absolute error on the components of the output vector.
*/
public double absoluteError;
/****************************************************************
*
* Basic constructor.
*
****************************************************************/
public Backpropagation(int numberOfLayers,
int numberOfNeurons[]) {
if( !(numberOfLayers >= 2) ) {
System.out.println("Wrong number of layers!");
return;
}
if( numberOfNeurons.length != numberOfLayers ) {
System.out.println("Wrong size of layer descriptor array!");
return;
}
boolean faulty = false;
for( int i = 0; i < numberOfNeurons.length && !faulty; ++i ) {
if( !(numberOfNeurons[i] > 0) ) {
faulty = true;
}
}
if( faulty ) {
System.out.println("Wrong size of layer descriptor array!");
return;
}
this.numberOfLayers = numberOfLayers;
this.numberOfNeurons = numberOfNeurons;
layers = new Neuron[numberOfLayers][];
/* input layer is just implicity existing */
for( int i = 1; i < numberOfLayers; ++i ) {
layers[i] = new Neuron[numberOfNeurons[i]];
for( int j = 0; j < numberOfNeurons[i]; ++j) {
layers[i][j] = new Neuron(this,numberOfNeurons[i - 1]);
}
}
/* initialization */
this.init();
}
/****************************************************************
*
* Creation of a new BackProp network with lots of parameters.
*
****************************************************************/
public Backpropagation(int numberOfLayers,
int numberOfNeurons[],
double learningRate,
double theta,
double elasticity,
double calmingRate,
double momentum,
double elasticityRate) {
/* checking the adequacy of the parameters */
this(numberOfLayers,numberOfNeurons);
if( !(learningRate > 0.0 && learningRate <= 1.0) ) {
System.out.println("Invalid learning rate!");
return;
}
if( !(calmingRate > 0.0 && calmingRate <= 1.0) ) {
System.out.println("Invalid calmingRate!");
return;
}
if( !(momentum >= 0.0 && momentum <= 1.0) ) {
System.out.println("Invalid momentum!");
return;
}
if( !(elasticityRate >= 0.0 && elasticityRate <= 1.0) ) {
System.out.println("Invalid elasticityRate!");
return;
}
/* creation of the network structure */
this.learningRate = learningRate;
this.theta = theta;
this.elasticity = elasticity;
this.calmingRate = calmingRate;
this.momentum = momentum;
this.elasticityRate = elasticityRate;
}
/****************************************************************
*
* Propagates the network input to get the output.
* Output is in the (-1,1) interval.
*
****************************************************************/
public void propagate(double input[]) {
/* checking the dimension of the input vector */
if( numberOfNeurons[0] != input.length ) {
System.out.println("Wrong number of input!");
return;
}
for( int i = 1; i < numberOfLayers; ++i ) {
double output[] = new double[numberOfNeurons[i]];
for( int j = 0; j < numberOfNeurons[i]; ++j ) {
output[j] = layers[i][j].propagate(input);
}
input = output;
}
}
/****************************************************************
*
* Teaches the network according to the input-output value pair.
* Output is in the (-1,1) interval.
*
****************************************************************/
public void learn(double input[],double output[]) {
/* checking the dimension of the output vector */
if( numberOfNeurons[numberOfLayers - 1] != output.length ) {
System.out.println("Wrong number of output!");
return;
}
/* forward phase */
propagate(input);
/* backward phase - output layer */
double error[] = new double[numberOfNeurons[numberOfLayers - 1]];
double prevError[];
absoluteError = 0.0;
for( int j = 0; j < numberOfNeurons[numberOfLayers - 1]; ++j ) {
/* calculation of the error term of this neuron */
error[j] = (output[j] - layers[numberOfLayers - 1][j].output) *
d1sigmoid(layers[numberOfLayers - 1][j].state);
/* learning with the error term */
layers[numberOfLayers - 1][j].learn(error[j]);
/* calculation of network error */
absoluteError += Math.abs(error[j]);
}
/* backward phase - hidden layers */
for( int i = numberOfLayers - 2; i > 0 ; --i ) {
prevError = error;
error = new double[numberOfNeurons[i]];
for( int j = 0; j < numberOfNeurons[i]; ++j ) {
/* calculation of the error term of this neuron */
/* using the next layers' error */
for( int k = 0; k < numberOfNeurons[i + 1]; ++k ) {
error[j] += prevError[k] * layers[i + 1][k].weights[j];
}
error[j] *= d1sigmoid(layers[i][j].state);
/* learning with the error term */
layers[i][j].learn(error[j]);
}
}
/* diminishing the learning rate */
learningRate *= calmingRate;
/* increasing temperature */
elasticity += (1.0 - elasticity) * elasticityRate;
}
/****************************************************************
*
* Alteration of the network with Gaussian.
*
****************************************************************/
public void alterWeights(){
Random random = new Random();
for( int i = 1; i < numberOfLayers; ++i ) {
for( int j = 0; j < numberOfNeurons[i]; ++j ) {
layers[i][j].alter(random);
}
}
}
/**
* Structure file contstants.
*/
final static String nameStr = "Backpropagation network structure description";
final static String authorStr = "@author: Richard Szabo";
final static String versionStr = "Version: 0.1";
final static String dateStr = "Creation: ";
final static String layersStr = "layers";
final static String neuronsStr = "neurons";
final static String learningRateStr = "learning rate";
final static String thetaStr = "theta";
final static String elasticityStr = "elasticity";
final static String calmingRateStr = "calming rate";
final static String momentumStr = "momentum";
final static String elasticityRateStr = "elasticity rate";
/****************************************************************
*
* Saves the network structure to textfile.
*
****************************************************************/
public void saveStructure(String filename) throws IOException {
// creating the structure file
PrintWriter pw = new PrintWriter(new FileWriter(filename));
// Format the current time.
GregorianCalendar gc = new GregorianCalendar();
SimpleDateFormat formatter = new SimpleDateFormat ("yyyy-MMM-dd");
String formattedStr = formatter.format(gc.getTime());
// writing the header information
pw.println("/* " + nameStr + " */");
pw.println("/* " + authorStr + " */");
pw.println("/* " + versionStr + " */");
pw.println("/* " + dateStr + formattedStr + " */");
// empty line
pw.println("");
// writing the layer number
pw.println(layersStr + ": " + numberOfLayers);
// writing the neuron number by layer
pw.print(neuronsStr + ":");
for( int i = 0; i < numberOfLayers; ++i ) {
pw.print(" " + numberOfNeurons[i]);
}
pw.println("");
// writing learning rate
pw.println(learningRateStr + ": " + learningRate);
// writing theta
pw.println(thetaStr + ": " + theta);
// writing elasticity
pw.println(elasticityStr + ": " + elasticity);
// writing calming rate
pw.println(calmingRateStr + ": " + calmingRate);
// writing momentum
pw.println(momentumStr + ": " + momentum);
// writing elasticity rate
pw.println(elasticityRateStr + ": " + elasticityRate);
// closing the written file
pw.close();
}
/****************************************************************
*
* Loads the network structure from textfile.
*
****************************************************************/
public static Backpropagation loadStructure(String filename) throws FileNotFoundException, IOException {
// regular expressions to search for
RE layersRe = null;
RE neuronsRe = null;
RE learningRateRe = null;
RE thetaRe = null;
RE elasticityRe = null;
RE calmingRateRe = null;
RE momentumRe = null;
RE elasticityRateRe = null;
try {
layersRe = new RE(layersStr + ": (.+)");
neuronsRe = new RE(neuronsStr + ":(( .+)+)");
learningRateRe = new RE(learningRateStr + ": (.+)");
thetaRe = new RE(thetaStr + ": (.+)");
elasticityRe = new RE(elasticityStr + ": (.+)");
calmingRateRe = new RE(calmingRateStr + ": (.+)");
momentumRe = new RE(momentumStr + ": (.+)");
elasticityRateRe = new RE(elasticityRateStr + ": (.+)");
} catch(REException ree) {
throw new IOException("Invalid regexp:" + ree);
}
// creation of an empty network
Backpropagation bpn = new Backpropagation();
// opening the network structure file
BufferedReader br = new BufferedReader(new FileReader(filename));
String line;
REMatch rem = null;
boolean layersSet = false;
// reading the file line by line
while( (line = br.readLine()) != null ) {
String which = "";
try {
// reading the layer number
rem = layersRe.getMatch(line);
if( rem != null ) {
bpn.numberOfLayers = Integer.parseInt(rem.toString(1));
bpn.layers = new Neuron[bpn.numberOfLayers][];
layersSet = true;
}
// reading the neuron number by layer
rem = neuronsRe.getMatch(line);
if( rem != null && layersSet ) {
StringTokenizer st = new StringTokenizer(rem.toString(1));
if( bpn.numberOfLayers != st.countTokens() ) {
throw new IOException("Invalid number of layers: " +
bpn.numberOfLayers + " " +
st.countTokens());
}
bpn.numberOfNeurons = new int[bpn.numberOfLayers];
/* input layer is just implicity existing */
bpn.numberOfNeurons[0] = Integer.parseInt(st.nextToken());
for( int i = 1; i < bpn.numberOfLayers; ++i ) {
bpn.numberOfNeurons[i] = Integer.parseInt(st.nextToken());
bpn.layers[i] = new Neuron[bpn.numberOfNeurons[i]];
for( int j = 0; j < bpn.numberOfNeurons[i]; ++j) {
bpn.layers[i][j] = new Neuron(bpn,bpn.numberOfNeurons[i - 1]);
}
}
} else if( rem != null ) {
// layers setting must precede neurons
throw new IOException("Layers setting must precede neurons.");
}
// reading learning rate
which = learningRateStr;
rem = learningRateRe.getMatch(line);
if( rem != null ) {
bpn.learningRate = Double.parseDouble(rem.toString(1));
}
// reading theta
which = thetaStr;
rem = thetaRe.getMatch(line);
if( rem != null ) {
bpn.theta = Double.parseDouble(rem.toString(1));
}
// reading elasticity
which = elasticityStr;
rem = elasticityRe.getMatch(line);
if( rem != null ) {
bpn.elasticity = Double.parseDouble(rem.toString(1));
}
// reading calming rate
which = calmingRateStr;
rem = calmingRateRe.getMatch(line);
if( rem != null ) {
bpn.calmingRate = Double.parseDouble(rem.toString(1));
}
// reading momentum
which = momentumStr;
rem = momentumRe.getMatch(line);
if( rem != null ) {
bpn.momentum = Double.parseDouble(rem.toString(1));
}
// reading elasticity rate
which = elasticityRateStr;
rem = elasticityRateRe.getMatch(line);
if( rem != null ) {
bpn.elasticityRate = Double.parseDouble(rem.toString(1));
}
} catch(NumberFormatException nfe) {
String s = "";
if(rem != null ) {
s = rem.toString(1);
}
throw new IOException("Invalid " + which + ":" + s);
}
}
// init network weights
bpn.init();
return bpn;
}
/****************************************************************
*
* Saves the learned network weights in UTF format.
*
****************************************************************/
public void saveNeuro(String path, String name){
try{
File rawfile = new File(path, name);
try{
RandomAccessFile file = new RandomAccessFile(rawfile,"rw");
/* writing the file type */
file.writeUTF("BPNN");
/* saving the network parameters */
file.writeInt(numberOfLayers);
for( int i = 0; i < numberOfLayers; ++i ) {
file.writeInt(numberOfNeurons[i]);
}
file.writeDouble(learningRate);
file.writeDouble(theta);
file.writeDouble(elasticity);
file.writeDouble(calmingRate);
file.writeDouble(momentum);
file.writeDouble(elasticityRate);
/* saving the network weights */
for( int i = 1; i < numberOfLayers; ++i ) {
for( int j = 0; j < numberOfNeurons[i]; ++j ) {
layers[i][j].writeUTF(file);
}
}
file.close();
} catch(IllegalArgumentException iae){
System.out.println("Argument error:" + iae);
}
} catch(IOException ioe){
System.out.println("File write error:" + ioe);
} catch(SecurityException se) {
System.out.println("Security exception:" + se);
}
}
/****************************************************************
*
* Loads a previously learned network from UTF format.
*
****************************************************************/
public static Backpropagation loadNeuro(String path, String name){
if(name == null || path == null) return null;
try{
File rawfile = new File(path, name);
try{
RandomAccessFile file = new RandomAccessFile(rawfile,"r");
/* checking the file type */
if(file.readUTF().compareTo("BPNN") != 0){
file.close();
System.out.println("Bad network description file!");
return null;
}
/* loading the network structure parameters and */
/* creating the network structure */
Backpropagation bpn = new Backpropagation();
bpn.numberOfLayers = file.readInt();
bpn.numberOfNeurons = new int[bpn.numberOfLayers];
for( int i = 0; i < bpn.numberOfLayers; ++i ) {
bpn.numberOfNeurons[i] = file.readInt();
}
bpn.layers = new Neuron[bpn.numberOfLayers][];
/* input layer is just implicity existing */
for( int i = 1; i < bpn.numberOfLayers; ++i ) {
bpn.layers[i] = new Neuron[bpn.numberOfNeurons[i]];
for( int j = 0; j < bpn.numberOfNeurons[i]; ++j) {
bpn.layers[i][j] = new Neuron(bpn,
bpn.numberOfNeurons[i - 1]);
}
}
/* loading the network parameters */
bpn.learningRate = file.readDouble();
bpn.theta = file.readDouble();
bpn.elasticity = file.readDouble();
bpn.calmingRate = file.readDouble();
bpn.momentum = file.readDouble();
bpn.elasticityRate = file.readDouble();
/* saving the network weights */
for( int i = 1; i < bpn.numberOfLayers; ++i ) {
for( int j = 0; j < bpn.numberOfNeurons[i]; ++j ) {
bpn.layers[i][j].readUTF(file);
}
}
file.close();
return bpn;
} catch(IllegalArgumentException iae){
System.out.println("Argument error:" + iae);
}
} catch(IOException ioe){
System.out.println("File read error:" + ioe);
} catch(SecurityException se) {
System.out.println("Security exception:" + se);
}
return null;
}
/****************************************************************
*
* String representation of the network.
*
****************************************************************/
public String toString() {
String s = new String();
s += "learningRate=" + learningRate + "\n" +
"theta=" + theta + "\n" +
"elasticity=" + elasticity + "\n" +
"calmingRate=" + calmingRate + "\n" +
"momentum=" + momentum + "\n" +
"elasticityRate=" + elasticityRate + "\n";
for( int i = 1; i < numberOfLayers; ++i ) {
s += "layer[" + i + "]:\n";
for( int j = 0; j < numberOfNeurons[i]; ++j ) {
s += "neuron[" + j + "]:\n";
for( int k = 0; k < numberOfNeurons[i - 1] + 1; ++k ) {
s += layers[i][j].weights[k] + ", ";
}
s += "\n";
}
s += "\n";
}
return s;
}
/****************************************************************
*
* Returns network output of a given neuron.
*
****************************************************************/
public double output(int i){
return layers[numberOfLayers - 1][i].output;
}
/****************************************************************
*
* Returns the output value of the jth hidden neuron in the ith layer.
* This function is necessary for the example of Mitchell.
*
****************************************************************/
public double hidden(int i, int j){
return layers[i][j].output;
}
/****************************************************************
*
* Returns the actual learning rate.
*
****************************************************************/
public double getLearningRate(){
return learningRate;
}
/****************************************************************
*
* Sets the learning rate.
*
****************************************************************/
public void setLearningRate(double learningRate){
this.learningRate = learningRate;
}
/****************************************************************
*
* Returns the theta.
*
****************************************************************/
public double getTheta(){
return theta;
}
/****************************************************************
*
* Sets the theta.
*
****************************************************************/
public void setTheta(double theta){
this.theta = theta;
}
/****************************************************************
*
* Returns the actual elasticity.
*
****************************************************************/
public double getElasticity(){
return elasticity;
}
/****************************************************************
*
* Sets the elasticity.
*
****************************************************************/
public void setElasticity(double elasticity){
this.elasticity = elasticity;
}
/****************************************************************
*
* Returns the calming rate.
*
****************************************************************/
public double getCalmingRate(){
return calmingRate;
}
/****************************************************************
*
* Sets the calming rate.
*
****************************************************************/
public void setCalmingRate(double calmingRate){
this.calmingRate = calmingRate;
}
/****************************************************************
*
* Returns the momentum.
*
****************************************************************/
public double getMomentum(){
return momentum;
}
/****************************************************************
*
* Sets the momentum.
*
****************************************************************/
public void setMomentum(double momentum){
this.momentum = momentum;
}
/****************************************************************
*
* Returns the elasticity rate.
*
****************************************************************/
public double getElasticityRate(){
return elasticityRate;
}
/****************************************************************
*
* Sets the elasticity rate.
*
****************************************************************/
public void setElasticityRate(double elasticityRate){
this.elasticityRate = elasticityRate;
}
/****************************************************************
*
* Hidden constructor of the class to be used in the loadNeuro
* static method.
*
****************************************************************/
protected Backpropagation() {
}
/****************************************************************
*
* Sigmoid function working in the (-1,1) interval.
*
****************************************************************/
protected double sigmoid(double x){
double r;
if( x == Double.POSITIVE_INFINITY ) {
return 1.0;
} else if( x == Double.NEGATIVE_INFINITY ) {
return -1.0;
} else if ( x == Double.NaN || x == Float.NaN ) {
r = Math.exp(theta);
} else {
r = Math.exp(-1.0 * elasticity * x + theta);
if( r == Double.NaN ) { // x was small, around 0
r = 1.0;
} else if( r == Double.POSITIVE_INFINITY ) {
return -1.0;
} else if( x == Double.NEGATIVE_INFINITY ) {
return -1.0;
}
}
return 2.0 / (1.0 + r) - 1;
}
/****************************************************************
*
* First derivative of the sigmoid function.
*
****************************************************************/
protected double d1sigmoid(double x){
double r;
if( x == Double.POSITIVE_INFINITY ) {
return 0.0;
} else if( x == Double.NEGATIVE_INFINITY ) {
return 0.5 * elasticity;
} else if ( x == Double.NaN || x == Float.NaN ) {
r = Math.exp(theta);
} else {
r = Math.exp(-1.0 * elasticity * x + theta);
if( r == Double.NaN ) { // x was small, around 0
r = 1.0;
} else if( r == Double.POSITIVE_INFINITY ) {
return 0.0;
} else if( x == Double.NEGATIVE_INFINITY ) {
return 0.0;
}
}
return (2.0 * elasticity * r) /((r + 1) * (r + 1));
}
/****************************************************************
*
* Initialization of the network with random weights.
*
****************************************************************/
protected void init(){
Random random = new Random();
for( int i = 1; i < numberOfLayers; ++i ) {
for( int j = 0; j < numberOfNeurons[i]; ++j ) {
layers[i][j].init(random);
}
}
}
}