Skip to content

Commit

Permalink
Merge pull request #5 from balhoff/categorical-data-enhancements
Browse files Browse the repository at this point in the history
Categorical data enhancements
  • Loading branch information
rvosa committed Sep 1, 2012
2 parents 0c4fc39 + 3cdd934 commit 04358b6
Show file tree
Hide file tree
Showing 14 changed files with 5,999 additions and 164 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.DS_Store
.classpath
.project
.settings
bin
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
import org.nexml.model.Annotatable;
import org.nexml.model.CategoricalMatrix;
import org.nexml.model.Character;
import org.nexml.model.CompoundCharacterState;
import org.nexml.model.ContinuousMatrix;
import org.nexml.model.Matrix;
import org.nexml.model.MatrixCell;
import org.nexml.model.MolecularMatrix;
import org.nexml.model.OTU;
import org.nexml.model.OTUs;
import org.nexml.model.UncertainCharacterState;

/**
* @author rvosa
Expand Down Expand Up @@ -70,7 +72,7 @@ else if ( xmlMatrix instanceof MolecularMatrix ) {
return null;
}
}

/**
*
* @param mesDataType
Expand All @@ -90,21 +92,48 @@ private FileElement readMatrix(String mesDataType,Matrix<?> xmlMatrix,MesquiteFi
for ( Character xmlCharacter : xmlCharacterList ) {
CharacterState mesCS = null;
MatrixCell<?> xmlCell = xmlMatrix.getCell(xmlOTU, xmlCharacter);
if ( mesMatrix instanceof ContinuousData ) {
Double xmlDouble = (Double)xmlCell.getValue();
if ( xmlDouble != null ) {
mesCS = new ContinuousState(xmlDouble);
((ContinuousState)mesCS).setNumItems(1); // XXX for multidimensional matrices
}
}
else {
org.nexml.model.CharacterState xmlState = (org.nexml.model.CharacterState)xmlCell.getValue();
if ( xmlState != null ) {
mesCS = new CategoricalState();
String xmlSymbol = xmlState.getSymbol().toString();
mesCS.setValue(xmlSymbol, mesMatrix);
}
}
if ( mesMatrix instanceof ContinuousData ) {
Double xmlDouble = (Double)xmlCell.getValue();
if ( xmlDouble != null ) {
mesCS = new ContinuousState(xmlDouble);
((ContinuousState)mesCS).setNumItems(1); // XXX for multidimensional matrices
}
}
else {
if (xmlMatrix instanceof CategoricalMatrix) {
for (org.nexml.model.CharacterState state : xmlCharacter.getCharacterStateSet().getCharacterStates()) {
if (!(state instanceof CompoundCharacterState)) {
String label = state.getLabel();
if ((null != label) && (!label.equals("")) && (mesMatrix instanceof CategoricalData)) {
int stateIndex = Integer.parseInt(state.getSymbol().toString());
((CategoricalData)mesMatrix).setStateName(mesCharacter, stateIndex, label);
}
}
}
}
org.nexml.model.CharacterState xmlState = (org.nexml.model.CharacterState)xmlCell.getValue();
if ( xmlState != null ) {
String xmlSymbol = xmlState.getSymbol().toString();
if (xmlMatrix instanceof CategoricalMatrix) {
long stateValue = CategoricalState.emptySet();
if (xmlState instanceof CompoundCharacterState) {
for (org.nexml.model.CharacterState state : ((CompoundCharacterState)xmlState).getStates()) {
int memberSymbol = Integer.parseInt(state.getSymbol().toString());
stateValue = CategoricalState.addToSet(stateValue, memberSymbol);
}
if (xmlState instanceof UncertainCharacterState) {
stateValue = CategoricalState.setUncertainty(stateValue, true);
}
} else {
stateValue = CategoricalState.makeSet(Integer.parseInt(xmlSymbol));
}
mesCS = new CategoricalState(stateValue);
} else {
mesCS = new CategoricalState();
mesCS.setValue(xmlSymbol, mesMatrix);
}
}
}
if ( mesCS != null ) {
mesMatrix.setState(mesCharacter, mesTaxon, mesCS);
//can add in character state stuff here
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import mesquite.categ.lib.CategoricalData;
import mesquite.categ.lib.CategoricalState;
import mesquite.categ.lib.DNAData;
import mesquite.categ.lib.ProteinData;
import mesquite.categ.lib.RNAData;
Expand All @@ -22,23 +25,31 @@
import org.nexml.model.CategoricalMatrix;
import org.nexml.model.Character;
import org.nexml.model.CharacterStateSet;
import org.nexml.model.CompoundCharacterState;
import org.nexml.model.Document;
import org.nexml.model.Matrix;
import org.nexml.model.MatrixCell;
import org.nexml.model.MolecularMatrix;
import org.nexml.model.NexmlWritable;
import org.nexml.model.OTU;
import org.nexml.model.OTUs;
import org.nexml.model.PolymorphicCharacterState;
import org.nexml.model.UncertainCharacterState;

public class NexmlCharactersBlockWriter extends NexmlBlockWriter {


/**
* Generate symbols for uncertainties and polymorphisms that don't conflict with existing state symbols.
*/
private int nextMultipleStateSymbol = CategoricalState.getMaxPossibleStateStatic() + 1;

@SuppressWarnings("serial")
private static final Map<String , String> xmlMolecularDataTypeFor = new HashMap<String, String>() {{
put(DNAData.DATATYPENAME, MolecularMatrix.DNA);
put(RNAData.DATATYPENAME, MolecularMatrix.RNA);
put(ProteinData.DATATYPENAME, MolecularMatrix.Protein);
}};

/**
*
* @param employerEmployee
Expand All @@ -57,66 +68,101 @@ protected Annotatable writeBlock(Document xmlProject, FileElement mesBlock) {
Taxa mesTaxa = mesData.getTaxa();
OTUs xmlTaxa = findEquivalentTaxa(mesTaxa,xmlProject);
org.nexml.model.Matrix<?> xmlMatrix = null;
CharacterStateSet xmlCharacterStateSet = null;
String mesDataType = mesData.getDataTypeName();
if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
xmlMatrix = xmlProject.createMolecularMatrix(xmlTaxa,xmlMolecularDataTypeFor.get(mesDataType));
xmlCharacterStateSet = ((MolecularMatrix)xmlMatrix).getCharacterStateSet();
}
else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
xmlMatrix = xmlProject.createCategoricalMatrix(xmlTaxa);
xmlCharacterStateSet = ((CategoricalMatrix)xmlMatrix).createCharacterStateSet();
}
else if ( mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME) ) {
xmlMatrix = xmlProject.createContinuousMatrix(xmlTaxa);
}
else {
MesquiteMessage.warnProgrammer("Can't write data type "+mesDataType);
}
writeCharacterStates(mesData, xmlMatrix, xmlCharacterStateSet);
}
writeCharacterStates(mesData, xmlMatrix);
return xmlMatrix;
}

/**
*
* @param mesData
* @param xmlMatrix
* @param xmlCharacterStateSet
*/
@SuppressWarnings("unchecked")
private void writeCharacterStates(CharacterData mesData, org.nexml.model.Matrix<?> xmlMatrix, CharacterStateSet xmlCharacterStateSet) {
private void writeCharacterStates(CharacterData mesData, org.nexml.model.Matrix<?> xmlMatrix) {
String mesDataType = mesData.getDataTypeName();
int mesNchar = mesData.getNumChars();
List<Character> xmlCharacters = new ArrayList<Character>(mesNchar);
for ( int j = 0; j < mesNchar; j++ ) {
for ( int characterIndex = 0; characterIndex < mesNchar; characterIndex++ ) {
CharacterStateSet xmlCharacterStateSet = null;
if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
xmlCharacterStateSet = ((MolecularMatrix)xmlMatrix).getCharacterStateSet();
}
else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
xmlCharacterStateSet = ((CategoricalMatrix)xmlMatrix).createCharacterStateSet();
}
Character xmlChar = xmlMatrix.createCharacter(xmlCharacterStateSet);
String mesCharacterName = mesData.getCharacterName(j);
String mesCharacterName = mesData.getCharacterName(characterIndex);
if ( null != mesCharacterName && ! mesCharacterName.equals("") ) {
xmlChar.setLabel(mesCharacterName);
}
if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
CategoricalData data = ((CategoricalData)mesData);
int maxStateIndex = data.maxStateWithName(characterIndex);
for (int stateIndex = 0; stateIndex <= maxStateIndex; stateIndex++) {
String symbol = String.valueOf(data.getSymbol(stateIndex));
org.nexml.model.CharacterState state = xmlChar.getCharacterStateSet().createCharacterState(symbol);
state.setSymbol(symbol);
if (data.hasStateName(characterIndex, stateIndex)) {
String stateLabel = data.getStateName(characterIndex, stateIndex);
state.setLabel(stateLabel);
}
}
}
xmlCharacters.add(xmlChar);
}
for ( int j = 0; j < mesData.getNumTaxa(); j++ ) {
CharacterState[] mesChars = mesData.getCharacterStateArray(j, 0, mesNchar);
Taxon mesTaxon = mesData.getTaxa().getTaxon(j);
for (int taxonIndex = 0; taxonIndex < mesData.getNumTaxa(); taxonIndex++) {
CharacterState[] mesCharStates = mesData.getCharacterStateArray(taxonIndex, 0, mesNchar);
Taxon mesTaxon = mesData.getTaxa().getTaxon(taxonIndex);
OTU xmlTaxon = findEquivalentTaxon(mesTaxon,xmlMatrix.getOTUs());
for ( int k = 0; k < mesNchar; k++ ) {
Character xmlChar = xmlCharacters.get(k);
String mesCharString = mesChars[k].toDisplayString();
if ( mesCharString != null && !mesCharString.equals("-") ) {
if ( mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME) ) {
MatrixCell<Double> xmlCell = (MatrixCell<Double>) xmlMatrix.getCell(xmlTaxon,xmlChar);
xmlCell.setValue((Double)xmlMatrix.parseSymbol(mesCharString));
for ( int characterIndex = 0; characterIndex < mesNchar; characterIndex++ ) {
Character xmlChar = xmlCharacters.get(characterIndex);
CharacterState mesState = mesCharStates[characterIndex];
if (mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME)) {
CharacterStateSet xmlStateSet = xmlChar.getCharacterStateSet();
CategoricalData categoricalData = (CategoricalData)mesData;
long stateAssignment = categoricalData.getState(characterIndex, taxonIndex);
org.nexml.model.CharacterState xmlCharacterState = null;
if (CategoricalState.hasMultipleStates(stateAssignment)) {
Set<String> symbols = new HashSet<String>();
for (int mesStateCode : CategoricalState.expand(stateAssignment)) {
symbols.add(String.valueOf(categoricalData.getSymbol(mesStateCode)));
}
if (CategoricalState.isUncertain(stateAssignment)) {
xmlCharacterState = findOrCreateUncertainStateSet(xmlStateSet, symbols);
} else { //polymorphic
xmlCharacterState = findOrCreatePolymorphicStateSet(xmlStateSet, symbols);
}
} else { // single state
if ((!CategoricalState.isUnassigned(stateAssignment)) && (!CategoricalState.isInapplicable(stateAssignment))) {
String symbol = String.valueOf(categoricalData.getSymbol(CategoricalState.getOnlyElement(stateAssignment)));
xmlCharacterState = xmlStateSet.lookupCharacterStateBySymbol(symbol);
}
}
else if ( mesDataType.equalsIgnoreCase(CategoricalData.DATATYPENAME) ) {
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon,xmlChar);
xmlCell.setValue((org.nexml.model.CharacterState)xmlMatrix.parseSymbol(mesCharString));
if (xmlCharacterState != null) {
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon, xmlChar);
xmlCell.setValue(xmlCharacterState);
}
else if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon,xmlChar);
xmlCell.setValue((org.nexml.model.CharacterState)((MolecularMatrix)xmlMatrix).parseSymbol(mesCharString,xmlMolecularDataTypeFor.get(mesDataType)));
}
}
} else if (mesDataType.equalsIgnoreCase(ContinuousData.DATATYPENAME)) {
MatrixCell<Double> xmlCell = (MatrixCell<Double>) xmlMatrix.getCell(xmlTaxon,xmlChar);
xmlCell.setValue((Double)xmlMatrix.parseSymbol(mesState.toDisplayString(), xmlChar));
} else if ( xmlMolecularDataTypeFor.containsKey(mesDataType) ) {
MatrixCell<org.nexml.model.CharacterState> xmlCell = (MatrixCell<org.nexml.model.CharacterState>) xmlMatrix.getCell(xmlTaxon,xmlChar);
xmlCell.setValue((org.nexml.model.CharacterState)((MolecularMatrix)xmlMatrix).parseSymbol(mesState.toDisplayString(), xmlMolecularDataTypeFor.get(mesDataType)));
}
}
}
}
Expand All @@ -131,4 +177,51 @@ protected Annotatable getThingInXmlBlock(NexmlWritable xmlBlock, int index) {
return xmlMatrix.getCharacters().get(index);
}

private UncertainCharacterState findOrCreateUncertainStateSet(CharacterStateSet containingStateSet, Set<String> symbols) {
for (org.nexml.model.CharacterState state : containingStateSet.getCharacterStates()) {
if (state instanceof UncertainCharacterState) {
UncertainCharacterState uncertainState = (UncertainCharacterState)state;
if (containsMatchingStates(uncertainState, symbols)) {
return uncertainState;
}
}
}
Set<org.nexml.model.CharacterState> memberStates = collectMatchingStates(containingStateSet, symbols);
return containingStateSet.createUncertainCharacterState(this.nextMultipleStateSymbol++, memberStates);
}

private PolymorphicCharacterState findOrCreatePolymorphicStateSet(CharacterStateSet containingStateSet, Set<String> symbols) {
for (org.nexml.model.CharacterState state : containingStateSet.getCharacterStates()) {
if (state instanceof PolymorphicCharacterState) {
PolymorphicCharacterState polymorphicState = (PolymorphicCharacterState)state;
if (containsMatchingStates(polymorphicState, symbols)) {
return polymorphicState;
}
}
}
Set<org.nexml.model.CharacterState> memberStates = collectMatchingStates(containingStateSet, symbols);
return containingStateSet.createPolymorphicCharacterState(this.nextMultipleStateSymbol++, memberStates);
}

private boolean containsMatchingStates(CompoundCharacterState state, Set<String> symbols) {
Set<String> containedSymbols = new HashSet<String>();
for (org.nexml.model.CharacterState containedState : state.getStates()) {
containedSymbols.add(containedState.getSymbol().toString());
}
return containedSymbols.equals(symbols);
}

private Set<org.nexml.model.CharacterState> collectMatchingStates(CharacterStateSet containingStateSet, Set<String> symbols) {
Set<org.nexml.model.CharacterState> memberStates = new HashSet<org.nexml.model.CharacterState>();
for (String symbol : symbols) {
org.nexml.model.CharacterState member = containingStateSet.lookupCharacterStateBySymbol(symbol);
if ( null != member ) {
memberStates.add(member);
} else {
memberStates.add(containingStateSet.createCharacterState(symbol));
}
}
return memberStates;
}

}
2 changes: 1 addition & 1 deletion src/org/nexml/model/Matrix.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public interface Matrix<T> extends OTUsLinkable, Annotatable, Segmented<Characte
* @param symbol
* @return
*/
T parseSymbol(String symbol);
T parseSymbol(String symbol, Character character);

/**
* Creates a row element for OTU otu, and populates
Expand Down
3 changes: 2 additions & 1 deletion src/org/nexml/model/impl/AnnotationImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ public void setValue(Set<Annotation> value) {
mValue = value;
getElement().setAttribute(XSI_TYPE,ResourceMeta);
for ( Annotation annotation : value ) {
getElement().appendChild(((AnnotationImpl)annotation).getElement());
Node node = getElement().getOwnerDocument().adoptNode(((AnnotationImpl)annotation).getElement());
getElement().appendChild(node);
}
}

Expand Down
Loading

0 comments on commit 04358b6

Please sign in to comment.