-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFramework.H
71 lines (67 loc) · 3.04 KB
/
Framework.H
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#ifndef _FRAMEWOaRK_
#define _FRAMEWORK_
#include "gsl/gsl_randist.h"
#include <unistd.h>
/*The way we will do this is that we will assume that we have some mapping of the genes across
* species. That is cluster k in species 1 will correspond to cluster k in another species.*/
class SpeciesDistManager;
class SpeciesClusterManager;
class GeneTreeManager;
class MappedOrthogroupReader;
class Framework
{
public:
Framework();
~Framework();
int readSpeciesData(const char*,const char*);
int readSpeciesTree(int,const char*);
int readOrthology(const char* specOrder, const char* orthomapfile);
int setSrcSpecies(const char*);
int setConstCov(const char*);
int startClustering(const char*);
int generateData(const char*);
int redisplay(const char*);
int setPdiagonalLeaf(double);
int setPdiagonalNonLeaf(double);
int setClusterTransProb(double);
int setClusterTransProb(const char*);
private:
int learnAncestralClusters();
int initClusterTransitionProb();
int initClusterTransitionProb(SpeciesDistManager::Species* anode);
int initTransitionProb(Matrix* m,double);
//This is all part of the expectation step
int inferAncestralClusters(map<int,map<string,int>*>&);
int inferExtantClusters(map<int,map<string,int>*>&);
int estimateClusterTransProb(map<int,map<string,int>*>& clusterAssignments);
int estimateClusterTransProb(SpeciesDistManager::Species* parent, SpeciesDistManager::Species* child, map<int,map<string,int>*>& clusterAssignments);
int estimateTransitionMatrix(string& parentname,string& childname, SpeciesDistManager::Species* child,map<int,map<string,int>*>& clusterAssignments);
//Maximization step. Not sure if we should estimate the cluster transition probabilites here or not.
int maximizationStep();
bool checkConvergence(map<int,map<string,int>*>& clusterAssignments,map<int,map<string,int>*>& oldclusterAssignments);
int saveClusterAssignments(map<int,map<string,int>*>& clusterAssignments,map<int,map<string,int>*> &oldclusterAssignments);
int sampleAncestralCluster(gsl_rng* r,SpeciesDistManager::Species* root);
int sampleChildCluster(gsl_rng* r,SpeciesDistManager::Species*,int,map<string,int>& clusterAssign);
int sampleChildCluster(gsl_rng* r, int parentClusterId,Matrix* params,vector<int>*);
int sortIndices(SpeciesDistManager::Species*);
int sortIndices(Matrix* values,int rowID, vector<int>*);
int dumpInferredAssignments(const char* outputDir, const char* suffix);
//This is very much like the SpeciesDist. Only it works with general potentials rather than pairwise potentials.
SpeciesDistManager sdMgr;
//This is the wrapper over the species-specific clusterings
SpeciesClusterManager scMgr;
GammaManager gammaMgr;
//For now assume in each species we store the ML estimate of the cluster ID
map<string,map<string,int>*> clusterAssignments;
MappedOrthogroupReader mor;
int maxClusterCnt;
int learnIter;
char outputDir[1024];
gsl_rng* randnum;
char srcSpecies[256];
double constCov; // Constant covariance for experts (DC ADD)
map<string,double> clusterTransitionProb;
double p_diagonal_leaf;
double p_diagonal_nonleaf;
};
#endif