/* FITTING ROUTINE -- WITH MODIFIED DATA TARGETS AND 4 INPUT PARAMETERS This is a shell program which calls 'amebsa' from Numerical Recipes, a downhill simplex method with simulated annealing. This shell program is tailored to the TB individual-based model, where 'funx' and 'gof' were written by Adrienne Keen and somewhat specific to the TB model. The rest of the code was adapted from code for a testing program for amebsa fitting, 'atest' written by Clarence Lehman in 2004, which in turn was based on the Numerical Recipes code for amebsa. Note: ptex is a batch file to convert these into printed page), calls tex and then calls things to make dvi out of it. Update: code modified to include parallel processing of separate calls to TB program. This is done by integrating the MPI modules in mpi.c, written by Clarence, based on his ideas and commands given to us by those at MSI. Update2: This version reads in modified data targets and accepts only four variable parameters, three disease risks and 'df'. Example new program call: 1 2 3 4 tb32i df=3 d1uk20=0.15 d2uk20=0.0004 d3uk20=0.09 */ #include #include #include #include #include #include "nrutil.h" #include "mpi.h" #include "fileio.h" #define X //Temporary marker on lines recently added or changed. #define ITER 25 //Number of iterations at a given temperature. #define W .8 //Displacement in the starting simplex. #define NPROC 500 //Maximum number of processors. #define RED .7 //Temperature reduction factor. #define THRESH .01 //Convergence threshold for parameter convergence. //Define statements specific to TB model #define AC 4 //Number of age classes for notification rates. #define SEX 2 //Number of sexes. #define ROB 3 //Number of regions of birth in the model. #define YR 11 //Number of years of model output for fitting #define IND (AC*SEX*ROB*YR) //to notification data (1999-2009). #define NOTIF 0 //Target data version, 0=rates, 1=numbers. #define AR (r==2? 1: a) //Index forcing age class 1 for SSA. X //Function prototypes double converge(int,int,double[]); //Test for convergence of parameters. float funx(float *); //Call TB model and evaluate fit of //output to calibration targets. float gof(float[ROB][YR][SEX][AC]); //Evaluate discrepancy between model //output and data (targets). int mpOpen(); //Open parallel processing. int mpClose(); //Close parallel processing. int mpCommon(double[][],double[],int); //Gather output from multiple processors. void simo(float[ROB][YR][SEX][AC],double[NPROC][ROB][YR][SEX][AC],double[ROB][YR][SEX][AC],double[ROB][YR][SEX][AC]);//Write simulation output to tex file. int Error(double); int Error1(double,char*,double); int Error2(double,char*,double,char*,double); int Error3(double,char*,double,char*,double,char*,double); #define forROB for(r=0; rNPROC) exit(3); //Check for appropriate number //of processors. char fitout[100]; //Create string for output file and sprintf(fitout,"fitR56-%02d.txt",cproc); //open file. fout=fopen(fitout,"w"); if(fout==NULL) { printf("Error opening output file!"); exit(3); } if(cproc==0) { plot = fopen("plotR56.tex","w"); //Open file for simulation output if(plot==NULL) //and print headers. { printf("Error opening output file!"); exit(3); } for(i=0; macro[i]; i++) //Opening all graphs. fprintf(plot, "\\input %s.tex\n", macro[i]); fprintf(plot, "\n"); } fprintf(fout,"Starting fit5, cproc=%d and nproc=%d\n",cproc,nproc); fflush(fout); fprintf(fout,"temptr = %f iter = %d and temptr red factor = %f\n",temptr,iter,RED); fflush(fout); if(NOTIF) //Read in data targets, where { if(ROB==3) //appropriate file depends on FileIO("targc1n.txt",fmt2[0],"r|"); //model version (SSA/nonSSA) and else //whether fitting to case numbers FileIO("targc0n.txt",fmt2[0],"r|"); } //or notification rates (NOTIF=1 else //or NOTIF=0). { if(ROB==3) FileIO("targc1.txt",fmt2[0],"r|"); else FileIO("targc0.txt",fmt2[0],"r|"); } char tout[100]; //For testing: Write out targets. sprintf(tout,"tout%02d.txt",cproc); FileIO(tout,fmt2[0],"w|"); if(argc>1) temptr = atof(argv[1]); //Retrieve starting temperature. p = matrix((long)1, (long)ndim+1, //Allocate and initialize (long)1, (long)ndim); //the starting simplex. p[1][1]=1.841155; p[1][2]=0.163031; p[1][3]=0.001048; p[1][4]=0.0755; p[2][1]=0.560994; p[2][2]=0.106223; p[2][3]=0.00038; p[2][4]=0.169378; p[3][1]=1.297929; p[3][2]=0.07736; p[3][3]=0.000933; p[3][4]=0.089043; p[4][1]=2.534156; p[4][2]=0.099439; p[4][3]=0.001034; p[4][4]=0.027495; p[5][1]=1.822337; p[5][2]=0.094479; p[5][3]=0.000948; p[5][4]=0.086628; pb = vector((long)1, (long)ndim+1); //- fprintf(fout,"About to get funx value at initial simplex points\n"); fflush(fout); y = vector((long)1, (long)ndim+1); //Allocate and initialize the for(i=1; i<=ndim+1; i++) //objective function values at y[i] = funx(p[i]); //each vertex of the initial simplex. //- fprintf(fout,"About to start sim annealing loop \n"); fflush(fout); for(yb=10000000; 1; temptr*=RED) //Run through the annealing. { iter = ITER; amebsa(p, y, ndim, pb, &yb, ftol, funx, &iter, temptr); fprintf(fout,"! %% temptr=%f idum=%ld iter=%d yb=%f pb=%f,%f,%f,%f\n\n", temptr, idum, iter, yb, pb[1],pb[2],pb[3],pb[4]); fflush(fout); if(iter>0) break; } fclose(fout); //Close annealing output file. if(cproc==0) //If this is first processor, fclose(plot); //close simulation output file. mpClose(); //Close parallel processing. exit(0); //Return to operating system. } /*----------------------------------------------------------------------------- GOODNESS OF FIT This function evaluates and returns the goodness-of-fit (GOF), using the sum of least squares, or other GOF statistic, of two arrays. Originally, the sum of least squares statistic was used for simplicity, though this is currently not implemented. Instead, a related method is used, but for any given age/sex/rob category, the target value (number of cases or notification rate) and simulation output value are both divided by the target value. This is done because the notification rates (and case numbers) are essentially on different scales for different categories. For example, SSA notification rates are around 250 per 100,000 each year, for the age category used for fitting. In UK-born, for the same age category, notification rates would be on the order of 6 per 100,000 each year. In addition, an R-squared option can be chosen or a log-likelihood deviance. ENTRY: 'si' contains values from simulation output. 'tar2' contains the calibration targets or observed data. 'STAT' contains the type of GOF statistic to be used. 0 Sum of squared deviations, relative to the observed value. 1 Log likelihood deviance function. 2 R-squared, fraction of variance explained among categories. X 3 Normalized squared deviation from the mean. X 4 Normalized absolute deviation from the median (not implemented). EXIT: 'gof' returns the goodness-of-fit of 'si' to 'tar2'. */ #define STAT 2 #define INF 1E100 float gof(float si[ROB][YR][SEX][AC]) { float x,x1,x2,rs,rn; int r,y,s,a; X // --- SQUARED DEVIATION FITTING //- fprintf(fout,"About to calculate GOF\n"); fflush(fout); if(STAT==0) //Get discrepancy between model and data { x=0; //using sum of least squares method.** for(r=0; r<2; r++) //First add discrepancy for for non-UK and for(y=0; y maxi[r][y][s][a]) maxi[r][y][s][a] = glob2[n][r][y][s][a]; } x=gof(sim2); //Obtain GOF and check GOF is the local2[0]=x; local2[1]=0; //same for all processors. mpCommon(global2,local2,2); for(i=1; i=0 && w125?50: max>50?25: 10) //Vertical resolution on graph size. #define HS 4.0 //Horizontal graph size, inches. #define HU 10.0 //Horizontal graph units. #define Phi 1.618 //Horizontal to vertical graph ratio. #define CBL "{" //Left curly brace, for balancing. #define CBR "}" //Right curly brace, for balancing. static int figure = 0; //Running figure number. static char *wsex[] = { "Male", "Female" }; static char *wage[] = { "black ", "blue ", "green", "red " }; static char *wrob[] = { "Non-UK born", "UK-born", "SSA-born" }; void simo(float sim[ROB][YR][SEX][AC], double glob2[NPROC][ROB][YR][SEX][AC],double mini[ROB][YR][SEX][AC],double maxi[ROB][YR][SEX][AC]) { int i,n,k,r,y,s,a; double max; //- fprintf(fout,"Entering 'simo'\n"); fflush(fout); fprintf(plot,"\\overfullrule=0pt\n"); for(r=ROB-1; r>=0; r--) //Determine the vertical scale for the for(s=0; s<=1; s++) //current graph (auto-scaling). { max = -1E9; for(a=0; a\n", HS/HU, (HS/Phi)/k); fprintf(plot,"\\setplotarea x from 0 to 10, y from 0 to %d \n", k); fprintf(plot,"\\plotheading {%s %s Notification Rates}\n", wrob[r], wsex[s]); fprintf(plot,"\\axis bottom %%label {Year}\n"); fprintf(plot," ticks withvalues {1999} {2001} {2003} {2005} {2007} {2009} /\n"); fprintf(plot," at 0 2 4 6 8 10 / /\n"); fprintf(plot,"\\axis left label {\\vertical{\\lines{Cases per 100,000\\cr\\cr}}}\n"); fprintf(plot," ticks numbered from 0 to %d by %d /\n", k, k<=10?2: k<=30?5: k<=150?25: 50); fprintf(plot,"\\axis right / \\axis top /\n"); for(a=0; a\n"); printf("\\setplotarea x from -6 to 6, y from -6 to 6\n"); printf("\\plotheading {$T=%.4f$}\n", temptr); printf("\\putrule from -6 0 to 6 0\n"); printf("\\putrule from 0 -6 to 0 6\n"); printf("\\put {$\\bullet$} at 0 0\n"); printf("\\put {$\\bullet$} at 2 2\n"); printf("\\plot"); //Note: plotting should be fixed for(i=1; i<=ndim+1; i++) //for the 4 parameters!!! printf(" %f %f ", p[i][1], p[i][2]); printf(" %f %f /\n", p[1][1], p[1][2]); for(i=1; i<=ndim+1; i++) printf("\\put {%.3g} at %f %f\n", y[i], p[i][1], p[i][2]); printf("\\endpicture $$\n\n"); */ } // ADRIENNE KEEN, APRIL 2011.