ConvertMySQLToBerkeleyDB.java from Texai at Krugle
Show ConvertMySQLToBerkeleyDB.java syntax highlighted
/*
* ConvertMySQLToBerkeleyDB.java
*
* Created on April 6, 2007, 3:27 PM
*
* Description: Loads the Berkeley DB from the MySQL dump file.
*
* Copyright (C) 2007 Stephen L. Reed.
*
* This program is free software; you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with this program;
* if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.texai.kb.conversion;
import com.sleepycat.je.CheckpointConfig;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentMutableConfig;
import com.sleepycat.je.TransactionConfig;
import com.sleepycat.persist.EntityStore;
import com.sleepycat.persist.StoreConfig;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.log4j.Logger;
import org.texai.kb.CacheInitializer;
import org.texai.kb.Constants;
import org.texai.kb.ejb.session.shared.TermFinderFacadeBean;
import org.texai.kb.ejb.session.shared.TermFinderFacadeLocal;
import org.texai.kb.entity.AtomicTerm;
import org.texai.kb.entity.BinaryGAF;
import org.texai.kb.entity.Formula;
import org.texai.kb.entity.NonAtomicTerm;
import org.texai.kb.entity.PDouble;
import org.texai.kb.entity.PLong;
import org.texai.kb.entity.PString;
import org.texai.kb.entity.PVariable;
import org.texai.kb.entity.QuaternaryGAF;
import org.texai.kb.entity.QuintaryGAF;
import org.texai.kb.entity.AbstractReifiedTerm;
import org.texai.kb.entity.Rule;
import org.texai.kb.entity.StubReifiedTerm;
import org.texai.kb.entity.StubTerm;
import org.texai.kb.entity.Symbol;
import org.texai.kb.entity.AbstractTerm;
import org.texai.kb.entity.TernaryGAF;
import org.texai.kb.entity.UnaryGAF;
import org.texai.kb.entity.helper.TermLoaderBean;
import org.texai.kb.entity.helper.TermLoaderLocal;
import org.texai.util.TexaiException;
/**
*
* @author reed
*/
public final class ConvertMySQLToBerkeleyDB {
/** the conversion starting step */
// private static final int CONVERSION_STARTING_STEP = 100;
private static final int CONVERSION_STARTING_STEP = 1;
/** the log4j logger */
private static final Logger LOGGER = Logger.getLogger(ConvertMySQLToBerkeleyDB.class.getName());
/** the term type array path */
private static final String TERM_TYPE_ARRAY_PATH = "/home/reed/temp/termTypeArray.csv";
/** the abstract terms path */
private static final String ABSTRACT_TERMS_PATH = "/home/reed/temp/AbstractTerm.csv";
/** the time point dictionary entries path */
private static final String TIME_POINT_DICTIONARY_ENTRIES_PATH = "/home/reed/temp/TimePointDictionaryEntries.csv";
/** the time points path */
private static final String TIME_POINTS_PATH = "/home/reed/temp/TimePoint.csv";
/** the symbols path */
private static final String SYMBOLS_PATH = "/home/reed/temp/Symbol.csv";
/** the variables path */
private static final String VARIABLES_PATH = "/home/reed/temp/PVariable.csv";
/** the longs path */
private static final String LONGS_PATH = "/home/reed/temp/PLong.csv";
/** the doubles path */
private static final String DOUBLES_PATH = "/home/reed/temp/PDouble.csv";
/** the atomic terms path */
private static final String ATOMIC_TERMS_PATH = "/home/reed/temp/AtomicTerm.csv";
/** the formulas path */
private static final String FORMULAS_PATH = "/home/reed/temp/Formula.csv";
/** the formulas path */
private static final String NON_ATOMIC_TERMS_PATH = "/home/reed/temp/NonAtomicTerm.csv";
/** the rules path */
private static final String RULES_PATH = "/home/reed/temp/Rule.csv";
/** the unary GAFs path */
private static final String UNARY_GAFS_PATH = "/home/reed/temp/UnaryGAF.csv";
/** the binnary GAFs path */
private static final String BINARY_GAFS_PATH = "/home/reed/temp/BinaryGAF.csv";
/** the ternary GAFs path */
private static final String TERNARY_GAFS_PATH = "/home/reed/temp/TernaryGAF.csv";
/** the quaternary GAFs path */
private static final String QUATERNARY_GAFS_PATH = "/home/reed/temp/QuaternaryGAF.csv";
/** the quintary GAFs path */
private static final String QUINTARY_GAFS_PATH = "/home/reed/temp/QuintaryGAF.csv";
/** the new term id array path */
private static final String NEW_TERM_ID_ARRAY_PATH = "/home/reed/temp/newTermIdArray.csv";
/** the time point term type value */
private static final byte TIME_POINT = 20;
/** the MySQL database connection */
private Connection connection;
/** the MySQL statement */
private Statement statement;
/** the array of term types which is indexed by the MySQL term id */
final byte[] termTypeArray = new byte[35378928];
/** the term id cross reference, which is indexed by the MySQL term id */
final int[] newTermIdArray = new int[35378928];
/** the dictionary of time points, term id --> time milliseconds */
final Map<Integer, Long> timePointDictionary = new HashMap<Integer, Long>();
/** the cache of reified terms that populate creator and creation purpose */
final Map<Integer, AbstractReifiedTerm> reifiedTermCache = new HashMap<Integer, AbstractReifiedTerm>();
/** the Oracle Berkeley DB environment */
private Environment environment;
/** the entity store */
private EntityStore entityStore;
/** the home directory of the Oracle Berkeley DB */
private File envHome = new File(Constants.ORACLE_BERKELEY_DB_HOME);
/** the transaction configuration */
private final TransactionConfig transactionConfig = new TransactionConfig();
/** the term finder facade */
private TermFinderFacadeLocal termFinderFacade;
/** the string builder */
private StringBuilder stringBuilder = new StringBuilder(Constants.STRING_BUILDER_SIZE);
/** the number of items processed */
private int nbrItemsProcessed = 0;
/** Creates a new instance of ConvertMySQLToBerkeleyDB. */
public ConvertMySQLToBerkeleyDB() {
super();
}
/** Initializes this application. */
private void initialize() {
LOGGER.info("initializing Oracle Berkeley DB Java Edition");
final EnvironmentConfig environmentConfig = new EnvironmentConfig();
final StoreConfig storeConfig = new StoreConfig();
final boolean readOnly = false;
environmentConfig.setReadOnly(readOnly);
storeConfig.setReadOnly(readOnly);
environmentConfig.setAllowCreate(!readOnly);
storeConfig.setAllowCreate(!readOnly);
environmentConfig.setTransactional(!readOnly);
storeConfig.setTransactional(!readOnly);
try {
environment = new Environment(envHome, environmentConfig);
entityStore = new EntityStore(environment, "EntityStore", storeConfig);
final EnvironmentMutableConfig environmentMutableConfig = new EnvironmentMutableConfig();
environmentMutableConfig.setTxnNoSync(true);
environment.setMutableConfig(environmentMutableConfig);
} catch (final DatabaseException ex) {
throw new TexaiException(ex);
}
transactionConfig.setReadUncommitted(true);
final TermLoaderLocal termLoader = new TermLoaderBean();
termLoader.setEntityStore(entityStore);
termLoader.initializeIndices();
termFinderFacade = new TermFinderFacadeBean();
termFinderFacade.setEntityStore(entityStore);
termFinderFacade.initializeIndices();
termFinderFacade.setTermLoader(termLoader);
CacheInitializer.initializeCaches();
if (CONVERSION_STARTING_STEP > 1) {
LOGGER.info("loading the new term id array");
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(NEW_TERM_ID_ARRAY_PATH));
nbrItemsProcessed = 0;
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
break;
}
final Object[] termIdResults = parseInteger(line);
final int termId = (Integer) termIdResults[0];
String remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] newTermIdResults = parseInteger(remainingLine);
final int newTermId = ((Integer) newTermIdResults[0]);
newTermIdArray[termId] = newTermId;
nbrItemsProcessed++;
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + termId + "," + newTermId);
}
}
bufferedReader.close();
} catch (final FileNotFoundException ex) {
LOGGER.info(NEW_TERM_ID_ARRAY_PATH + " file not found, continuing");
} catch (final IOException ex) {
throw new TexaiException(ex);
}
LOGGER.info("loaded " + nbrItemsProcessed + " items");
}
}
/** Converts the abstract terms. */
private void convertAbstractTerms() {
if (CONVERSION_STARTING_STEP > 1) {
LOGGER.info("loading the term type array");
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(TERM_TYPE_ARRAY_PATH));
nbrItemsProcessed = 0;
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
break;
}
final Object[] termIdResults = parseInteger(line);
final Integer termId = (Integer) termIdResults[0];
String remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termTypeResults = parseInteger(remainingLine);
final byte termType = ((Integer) termTypeResults[0]).byteValue();
termTypeArray[termId] = termType;
nbrItemsProcessed++;
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + termId + "," + termType);
}
}
bufferedReader.close();
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
LOGGER.info("loaded " + nbrItemsProcessed + " items");
return;
}
LOGGER.info("converting abstract terms");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(ABSTRACT_TERMS_PATH));
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] termTypeResults = parseString(line);
final String termTypeString = (String) termTypeResults[0];
String remainingLine = (String) termTypeResults[1];
byte termType = 0;
if ("Symbol".equals(termTypeString)) {
termType = Constants.SYMBOL;
} else if ("PVariable".equals(termTypeString)) {
termType = Constants.VARIABLE;
} else if ("PLong".equals(termTypeString)) {
termType = Constants.LONG;
} else if ("PDouble".equals(termTypeString)) {
termType = Constants.DOUBLE;
} else if ("PString".equals(termTypeString)) {
termType = Constants.STRING;
} else if ("TimePoint".equals(termTypeString)) {
termType = TIME_POINT;
} else if ("AtomicTerm".equals(termTypeString)) {
termType = Constants.ATOMIC;
} else if ("NonAtomicTerm".equals(termTypeString)) {
termType = Constants.NON_ATOMIC;
} else if ("Formula".equals(termTypeString)) {
termType = Constants.FORMULA;
} else if ("Rule".equals(termTypeString)) {
termType = Constants.RULE;
} else if ("UnaryGAF".equals(termTypeString)) {
termType = Constants.UNARY_GAF;
} else if ("BinaryGAF".equals(termTypeString)) {
termType = Constants.BINARY_GAF;
} else if ("TernaryGAF".equals(termTypeString)) {
termType = Constants.TERNARY_GAF;
} else if ("QuaternaryGAF".equals(termTypeString)) {
termType = Constants.QUATERNARY_GAF;
} else if ("QuintaryGAF".equals(termTypeString)) {
termType = Constants.QUINTARY_GAF;
}
assert termType != 0 : "termType must not be zero";
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
assert ((String) termIdResults[1]).isEmpty() : "expected an empty string";
final Integer termId = (Integer) termIdResults[0];
termTypeArray[termId] = termType;
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
try {
LOGGER.info("storing the term type array");
final BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(TERM_TYPE_ARRAY_PATH));
for (int i = 0; i < termTypeArray.length; i++) {
bufferedWriter.write(i + "," + termTypeArray[i]);
bufferedWriter.newLine();
}
bufferedWriter.close();
} catch (final IOException ex) {
throw new TexaiException(ex);
}
}
/** Converts the time points. */
private void convertTimePoints() {
if (CONVERSION_STARTING_STEP > 2) {
LOGGER.info("loading the time point dictionary entries");
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(TIME_POINT_DICTIONARY_ENTRIES_PATH));
nbrItemsProcessed = 0;
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
break;
}
final Object[] termIdResults = parseInteger(line);
final Integer termId = (Integer) termIdResults[0];
String remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] timePointValueResults = parseLong(remainingLine);
final Long timePointValue = ((Long) timePointValueResults[0]);
assert termTypeArray[termId] == TIME_POINT : "invalid time point at termId: " + termId + ", line: " + line;
timePointDictionary.put(termId, timePointValue);
nbrItemsProcessed++;
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + termId + "," + timePointValue);
}
}
bufferedReader.close();
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
LOGGER.info("loaded " + nbrItemsProcessed + " items");
return;
}
LOGGER.info("converting time points");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(TIME_POINTS_PATH));
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] timePointInfoIdResults = parseInteger(line);
String remainingLine = (String) timePointInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] timePointValueResults = parseString(remainingLine);
final String dateString = (String) timePointValueResults[0];
final Long timePointValue = java.sql.Timestamp.valueOf(dateString).getTime();
remainingLine = (String) timePointValueResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
assert termTypeArray[termId] == TIME_POINT : "invalid time point at termId: " + termId + ", line: " + line;
timePointDictionary.put(termId, timePointValue);
nbrItemsProcessed++;
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + dateString + " " + timePointValue);
}
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
try {
LOGGER.info("storing the time point dictionary entries");
final BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(TIME_POINT_DICTIONARY_ENTRIES_PATH));
for (final Entry<Integer, Long> entry : timePointDictionary.entrySet()) {
bufferedWriter.write(entry.getKey() + "," + entry.getValue());
bufferedWriter.newLine();
}
bufferedWriter.close();
} catch (final IOException ex) {
throw new TexaiException(ex);
}
}
/** Converts the symbols. */
private void convertSymbols() {
if (CONVERSION_STARTING_STEP > 3) {
return;
}
LOGGER.info("converting symbols");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(SYMBOLS_PATH));
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] symbolInfoIdResults = parseInteger(line);
String remainingLine = (String) symbolInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] nameValueResults = parseString(remainingLine);
final String nameValue = (String) nameValueResults[0];
remainingLine = (String) nameValueResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
assert termTypeArray[termId] == Constants.SYMBOL : "invalid symbol at termId: " + termId + ", line: " + line;
final Symbol symbol = termFinderFacade.findOrCreateSymbolByNameValue(nameValue);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + symbol);
}
assert symbol.getTermId() != 0 : "termId must not be zero";
newTermIdArray[termId] = symbol.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the variables. */
private void convertVariables() {
if (CONVERSION_STARTING_STEP > 4) {
return;
}
LOGGER.info("converting variables");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(VARIABLES_PATH));
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] pVariableInfoIdResults = parseInteger(line);
String remainingLine = (String) pVariableInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] nameValueResults = parseString(remainingLine);
final String nameValue = (String) nameValueResults[0];
remainingLine = (String) nameValueResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
assert termTypeArray[termId] == Constants.VARIABLE : "invalid variable at termId: " + termId + ", line: " + line;
final PVariable pVariable = termFinderFacade.findOrCreatePVariableByNameValue(nameValue);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + pVariable);
}
assert pVariable.getTermId() != 0 : "termId must not be zero";
newTermIdArray[termId] = pVariable.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the longs. */
private void convertLongs() {
if (CONVERSION_STARTING_STEP > 5) {
return;
}
LOGGER.info("converting longs");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(LONGS_PATH));
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] pLongInfoIdResults = parseInteger(line);
String remainingLine = (String) pLongInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] longValueResults = parseLong(remainingLine);
final Long longValue = (Long) longValueResults[0];
remainingLine = (String) longValueResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
assert termTypeArray[termId] == Constants.LONG : "invalid long at termId: " + termId + ", line: " + line;
final PLong pLong = termFinderFacade.findOrCreatePLongByLongValue(longValue);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + pLong);
}
newTermIdArray[termId] = pLong.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the doubles. */
private void convertDoubles() {
if (CONVERSION_STARTING_STEP > 6) {
return;
}
LOGGER.info("converting doubles");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(DOUBLES_PATH));
while (true) {
final String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] pDoubleInfoIdResults = parseInteger(line);
String remainingLine = (String) pDoubleInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] doubleValueResults = parseDouble(remainingLine);
final Double doubleValue = (Double) doubleValueResults[0];
remainingLine = (String) doubleValueResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
assert termTypeArray[termId] == Constants.DOUBLE : "invalid double at termId: " + termId + ", line: " + line;
final PDouble pDouble = termFinderFacade.findOrCreatePDoubleByDoubleValue(doubleValue);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + pDouble);
}
newTermIdArray[termId] = pDouble.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the strings. */
private void convertStrings() {
if (CONVERSION_STARTING_STEP > 7) {
return;
}
LOGGER.info("converting strings");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
Class.forName("com.mysql.jdbc.Driver").newInstance();
connection = DriverManager.getConnection("jdbc:mysql://localhost/kb_test?user=mysql&password=lesterlester");
final ResultSet resultSet = connection.createStatement().executeQuery("SELECT stringValue, pString_termId FROM PStringInfo");
while (resultSet.next()) {
final String stringValue = resultSet.getString("stringValue");
final Integer termId = (int) resultSet.getLong("pString_termId");
assert termTypeArray[termId] == Constants.STRING : "invalid string at termId: " + termId;
final PString pString = termFinderFacade.findOrCreatePStringByStringValue(stringValue);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + pString);
}
newTermIdArray[termId] = pString.getTermId();
nbrItemsProcessed++;
}
resultSet.close();
connection.close();
} catch (final ClassNotFoundException ex) {
throw new TexaiException(ex);
} catch (final InstantiationException ex) {
throw new TexaiException(ex);
} catch (final IllegalAccessException ex) {
throw new TexaiException(ex);
} catch (final SQLException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the atomic terms. */
private void convertAtomicTerms() {
if (CONVERSION_STARTING_STEP > 8) {
return;
}
LOGGER.info("converting atomic terms");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(ATOMIC_TERMS_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
while (line.endsWith("\\")) {
line = line + bufferedReader.readLine();
}
LOGGER.debug(line);
final Object[] pDoubleInfoIdResults = parseInteger(line);
String remainingLine = (String) pDoubleInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termNameResults = parseString(remainingLine);
final String termName = (String) termNameResults[0];
remainingLine = (String) termNameResults[1];
remainingLine = parseComma(remainingLine);
String prettyName = null;
try {
final Object[] prettyNameResults = parseString(remainingLine);
prettyName = (String) prettyNameResults[0];
remainingLine = (String) prettyNameResults[1];
remainingLine = parseComma(remainingLine);
} catch (final TexaiException ex) {
throw new TexaiException(ex + "\n" + line);
}
final Object[] uuidResults = parseString(remainingLine);
final String uuidString = (String) uuidResults[0];
remainingLine = (String) uuidResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
remainingLine = (String) creatorTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
remainingLine = (String) creationPurposeTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator + " " + creatorTermId);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
LOGGER.debug("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
try {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose + " " + creationPurposeTermId);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
} catch (final TexaiException ex) {
LOGGER.debug("creationPurpose not found for " + creationPurposeTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
Date creationDate = new Date();
if (creationTimePointTermId != null) {
creationDate = new Date(timePointDictionary.get(creationTimePointTermId));
}
assert termTypeArray[termId] == Constants.ATOMIC : "invalid atomic term at termId: " + termId + ", line: " + line;
final AtomicTerm atomicTerm = termFinderFacade.createAtomicTermByTermName(
termName,
prettyName,
uuidString,
creator,
creationPurpose,
creationDate);
if ((nbrItemsProcessed < 10) || (nbrItemsProcessed % 100000 == 0) || termId == 30801921 || termId == 30801926) {
LOGGER.info(" " + atomicTerm);
}
assert atomicTerm.equals(termFinderFacade.findAtomicTermByTermId(atomicTerm.getTermId()));
newTermIdArray[termId] = atomicTerm.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the formulas. */
private void convertFormulasPhase1() {
if (CONVERSION_STARTING_STEP > 9) {
return;
}
LOGGER.info("converting formulas phase 1");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(FORMULAS_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
while (line.endsWith("\\")) {
line = line + bufferedReader.readLine();
}
LOGGER.debug(line);
final Object[] formulaInfoIdResults = parseInteger(line);
String remainingLine = (String) formulaInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] formulaStringResults = parseString(remainingLine);
final String formulaString = (String) formulaStringResults[0];
remainingLine = (String) formulaStringResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
remainingLine = (String) creationTimePointTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
remainingLine = (String) creatorTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
throw new TexaiException("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
Date creationDate = new Date();
if (creationTimePointTermId != null) {
creationDate = new Date(timePointDictionary.get(creationTimePointTermId));
}
assert termTypeArray[termId] == Constants.FORMULA : "invalid formula at termId: " + termId + ", line: " + line;
final Formula formula = termFinderFacade.findOrCreateFormulaByFormulaString(
formulaString,
creator,
creationPurpose,
creationDate);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + formula);
}
newTermIdArray[termId] = formula.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the non-atomic terms. */
private void convertNonAtomicTerms() {
if (CONVERSION_STARTING_STEP > 10) {
return;
}
LOGGER.info("converting non-atomic terms");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(NON_ATOMIC_TERMS_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
while (line.endsWith("\\")) {
line = line + bufferedReader.readLine();
}
LOGGER.debug(line);
final Object[] nonAtomicTermInfoIdResults = parseInteger(line);
String remainingLine = (String) nonAtomicTermInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termNameResults = parseString(remainingLine);
final String termName = (String) termNameResults[0];
remainingLine = (String) termNameResults[1];
remainingLine = parseComma(remainingLine);
final Object[] prettyNameResults = parseString(remainingLine);
final String prettyName = (String) prettyNameResults[0];
remainingLine = (String) prettyNameResults[1];
remainingLine = parseComma(remainingLine);
final Object[] uuidStringResults = parseString(remainingLine);
final String uuidString = (String) uuidStringResults[0];
remainingLine = (String) uuidStringResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] formulaTermIdResults = parseInteger(remainingLine);
final Integer formulaTermId = (Integer) formulaTermIdResults[0];
remainingLine = (String) formulaTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
remainingLine = (String) creationTimePointTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
remainingLine = (String) creationPurposeTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
throw new TexaiException("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
assert termTypeArray[termId] == Constants.NON_ATOMIC : "invalid non-atomic term at termId: " + termId + ", line: " + line;
final Formula formula = termFinderFacade.findFormulaByTermId(newTermIdArray[formulaTermId]);
final NonAtomicTerm nonAtomicTerm = termFinderFacade.findOrCreateNonAtomicTermByTermName(
termName,
prettyName,
uuidString,
formula,
creator,
creationPurpose);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + nonAtomicTerm);
}
newTermIdArray[termId] = nonAtomicTerm.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the rules. */
private void convertRules() {
if (CONVERSION_STARTING_STEP > 11) {
return;
}
LOGGER.info("converting rules");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(RULES_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] ruleInfoIdResults = parseInteger(line);
String remainingLine = (String) ruleInfoIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] strengthResults = parseDouble(remainingLine);
final Double strength = (Double) strengthResults[0];
remainingLine = (String) strengthResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] formulaTermIdResults = parseInteger(remainingLine);
final Integer formulaTermId = (Integer) formulaTermIdResults[0];
remainingLine = (String) formulaTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] contextTermIdResults = parseInteger(remainingLine);
final Integer contextTermId = (Integer) contextTermIdResults[0];
remainingLine = (String) contextTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
remainingLine = (String) creationTimePointTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
remainingLine = (String) creationPurposeTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
throw new TexaiException("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
assert termTypeArray[termId] == Constants.RULE : "invalid rule at termId: " + termId + ", line: " + line;
final Formula formula = termFinderFacade.findFormulaByTermId(newTermIdArray[formulaTermId]);
assert formula != null : "formula not found for MySQL formula term id " + formulaTermId;
final AbstractReifiedTerm context = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[contextTermId],
newTermIdArray[contextTermId]);
assert context != null : "context not found for MySQL context term id " + contextTermId;
final Rule rule = termFinderFacade.findOrCreateRuleByFormula(
formula,
context,
strength,
creator,
creationPurpose);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + rule);
}
newTermIdArray[termId] = rule.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the unary GAFs. */
private void convertUnaryGAFs() {
if (CONVERSION_STARTING_STEP > 12) {
return;
}
LOGGER.info("converting unary GAFs");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(UNARY_GAFS_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] strengthResults = parseDouble(line);
final Double strength = (Double) strengthResults[0];
String remainingLine = (String) strengthResults[1];
remainingLine = parseComma(remainingLine);
final Object[] generatedPhraseResults = parseString(remainingLine);
final String generatedPhrase = (String) generatedPhraseResults[0];
remainingLine = (String) generatedPhraseResults[1];
remainingLine = parseComma(remainingLine);
final Object[] contextTermIdResults = parseInteger(remainingLine);
final Integer contextTermId = (Integer) contextTermIdResults[0];
remainingLine = (String) contextTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
remainingLine = (String) creationPurposeTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
remainingLine = (String) creatorTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
remainingLine = (String) creationTimePointTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] predicateTermIdResults = parseInteger(remainingLine);
final Integer predicateTermId = (Integer) predicateTermIdResults[0];
remainingLine = (String) predicateTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg1TermIdResults = parseInteger(remainingLine);
final Integer arg1TermId = (Integer) arg1TermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
throw new TexaiException("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
assert termTypeArray[termId] == Constants.UNARY_GAF : "invalid unary GAF at termId: " + termId + ", line: " + line;
final AbstractReifiedTerm context = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[contextTermId],
newTermIdArray[contextTermId]);
assert context != null : "context not found for MySQL context term id " + contextTermId;
Date creationDate = new Date();
if (creationTimePointTermId != null) {
creationDate = new Date(timePointDictionary.get(creationTimePointTermId));
}
final AbstractReifiedTerm predicate = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[predicateTermId],
newTermIdArray[predicateTermId]);
assert predicate != null : "predicate not found for MySQL predicate term id " + predicateTermId;
final AbstractTerm arg1 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg1TermId],
newTermIdArray[arg1TermId]);
assert arg1 != null : "arg1 not found for MySQL arg1 term id " + arg1TermId;
final List<AbstractTerm> args = new ArrayList<AbstractTerm>(Constants.UNARY_ARGS_SIZE);
args.add(arg1);
final UnaryGAF unaryGAF = termFinderFacade.findOrCreateUnaryGAFByConstituents(
predicate,
args,
context,
strength,
generatedPhrase,
creator,
creationPurpose,
creationDate);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + unaryGAF);
}
newTermIdArray[termId] = unaryGAF.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the ternary GAFs. */
private void convertTernaryGAFs() {
if (CONVERSION_STARTING_STEP > 13) {
return;
}
LOGGER.info("converting ternary GAFs");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(TERNARY_GAFS_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] strengthResults = parseDouble(line);
final Double strength = (Double) strengthResults[0];
String remainingLine = (String) strengthResults[1];
remainingLine = parseComma(remainingLine);
final Object[] generatedPhraseResults = parseString(remainingLine);
final String generatedPhrase = (String) generatedPhraseResults[0];
remainingLine = (String) generatedPhraseResults[1];
remainingLine = parseComma(remainingLine);
final Object[] contextTermIdResults = parseInteger(remainingLine);
final Integer contextTermId = (Integer) contextTermIdResults[0];
remainingLine = (String) contextTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
remainingLine = (String) creationPurposeTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
remainingLine = (String) creatorTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
remainingLine = (String) creationTimePointTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] predicateTermIdResults = parseInteger(remainingLine);
final Integer predicateTermId = (Integer) predicateTermIdResults[0];
remainingLine = (String) predicateTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg1TermIdResults = parseInteger(remainingLine);
final Integer arg1TermId = (Integer) arg1TermIdResults[0];
remainingLine = (String) arg1TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg2TermIdResults = parseInteger(remainingLine);
final Integer arg2TermId = (Integer) arg2TermIdResults[0];
remainingLine = (String) arg2TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg3TermIdResults = parseInteger(remainingLine);
final Integer arg3TermId = (Integer) arg3TermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
throw new TexaiException("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
assert termTypeArray[termId] == Constants.TERNARY_GAF : "invalid ternary GAF at termId: " + termId + ", line: " + line;
final AbstractReifiedTerm context = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[contextTermId],
newTermIdArray[contextTermId]);
assert context != null : "context not found for MySQL context term id " + contextTermId;
Date creationDate = new Date();
if (creationTimePointTermId != null) {
creationDate = new Date(timePointDictionary.get(creationTimePointTermId));
}
final AbstractReifiedTerm predicate = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[predicateTermId],
newTermIdArray[predicateTermId]);
assert predicate != null : "predicate not found for MySQL predicate term id " + predicateTermId;
AbstractTerm arg1 = null;
try {
arg1 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg1TermId],
newTermIdArray[arg1TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg1 not found for MySQL term id " + arg1TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg1 not found for MySQL term id " + arg1TermId);
continue;
}
assert arg1 != null : "arg1 must not be null";
assert arg1.getTermType() == termTypeArray[arg1TermId]: "arg1.getTermType() "
+ arg1.getTermType() + ", termTypeArray[arg1TermId]) " + termTypeArray[arg1TermId];
AbstractTerm arg2 = null;
try {
arg2 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg2TermId],
newTermIdArray[arg2TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg2 not found for MySQL term id " + arg2TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg2 not found for MySQL term id " + arg2TermId);
continue;
}
assert arg2 != null : "arg2 must not be null";
assert arg2.getTermType() == termTypeArray[arg2TermId]: "arg2.getTermType() "
+ arg2.getTermType() + ", termTypeArray[arg2TermId]) " + termTypeArray[arg2TermId];
AbstractTerm arg3 = null;
try {
arg3 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg3TermId],
newTermIdArray[arg3TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg3 not found for MySQL term id " + arg3TermId);
} catch (final AssertionError ex) {
LOGGER.info(" arg3 not found for MySQL term id " + arg3TermId);
continue;
}
assert arg3 != null : "arg3 must not be null";
assert arg3.getTermType() == termTypeArray[arg3TermId]: "arg3.getTermType() "
+ arg3.getTermType() + ", termTypeArray[arg3TermId]) " + termTypeArray[arg3TermId];
final List<AbstractTerm> args = new ArrayList<AbstractTerm>(Constants.TERNARY_ARGS_SIZE);
args.add(arg1);
args.add(arg2);
args.add(arg3);
final TernaryGAF ternaryGAF = termFinderFacade.findOrCreateTernaryGAFByConstituents(
predicate,
args,
context,
strength,
generatedPhrase,
creator,
creationPurpose,
creationDate);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + ternaryGAF);
}
newTermIdArray[termId] = ternaryGAF.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the quaternary GAFs. */
private void convertQuaternaryGAFs() {
if (CONVERSION_STARTING_STEP > 14) {
return;
}
LOGGER.info("converting quaternary GAFs");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(QUATERNARY_GAFS_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] strengthResults = parseDouble(line);
final Double strength = (Double) strengthResults[0];
String remainingLine = (String) strengthResults[1];
remainingLine = parseComma(remainingLine);
final Object[] generatedPhraseResults = parseString(remainingLine);
final String generatedPhrase = (String) generatedPhraseResults[0];
remainingLine = (String) generatedPhraseResults[1];
remainingLine = parseComma(remainingLine);
final Object[] contextTermIdResults = parseInteger(remainingLine);
final Integer contextTermId = (Integer) contextTermIdResults[0];
remainingLine = (String) contextTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
remainingLine = (String) creationPurposeTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
remainingLine = (String) creatorTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
remainingLine = (String) creationTimePointTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] predicateTermIdResults = parseInteger(remainingLine);
final Integer predicateTermId = (Integer) predicateTermIdResults[0];
remainingLine = (String) predicateTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg1TermIdResults = parseInteger(remainingLine);
final Integer arg1TermId = (Integer) arg1TermIdResults[0];
remainingLine = (String) arg1TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg2TermIdResults = parseInteger(remainingLine);
final Integer arg2TermId = (Integer) arg2TermIdResults[0];
remainingLine = (String) arg2TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg3TermIdResults = parseInteger(remainingLine);
final Integer arg3TermId = (Integer) arg3TermIdResults[0];
remainingLine = (String) arg3TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg4TermIdResults = parseInteger(remainingLine);
final Integer arg4TermId = (Integer) arg4TermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
throw new TexaiException("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
assert termTypeArray[termId] == Constants.QUATERNARY_GAF : "invalid quaternary GAF at termId: " + termId + ", line: " + line;
final AbstractReifiedTerm context = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[contextTermId],
newTermIdArray[contextTermId]);
assert context != null : "context not found for MySQL context term id " + contextTermId;
Date creationDate = new Date();
if (creationTimePointTermId != null) {
creationDate = new Date(timePointDictionary.get(creationTimePointTermId));
}
final AbstractReifiedTerm predicate = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[predicateTermId],
newTermIdArray[predicateTermId]);
assert predicate != null : "predicate not found for MySQL predicate term id " + predicateTermId;
AbstractTerm arg1 = null;
try {
arg1 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg1TermId],
newTermIdArray[arg1TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg1 not found for MySQL term id " + arg1TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg1 not found for MySQL term id " + arg1TermId);
continue;
}
assert arg1 != null : "arg1 must not be null";
AbstractTerm arg2 = null;
try {
arg2 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg2TermId],
newTermIdArray[arg2TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg2 not found for MySQL term id " + arg2TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg2 not found for MySQL term id " + arg2TermId);
continue;
}
assert arg2 != null : "arg2 must not be null";
AbstractTerm arg3 = null;
try {
arg3 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg3TermId],
newTermIdArray[arg3TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg3 not found for MySQL term id " + arg3TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg3 not found for MySQL term id " + arg3TermId);
continue;
}
assert arg3 != null : "arg3 must not be null";
AbstractTerm arg4 = null;
try {
arg4 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg4TermId],
newTermIdArray[arg4TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg4 not found for MySQL term id " + arg4TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg4 not found for MySQL term id " + arg4TermId);
continue;
}
assert arg4 != null : "arg4 must not be null";
final List<AbstractTerm> args = new ArrayList<AbstractTerm>(Constants.QUATERNARY_ARGS_SIZE);
args.add(arg1);
args.add(arg2);
args.add(arg3);
args.add(arg4);
final QuaternaryGAF quaternaryGAF = termFinderFacade.findOrCreateQuaternaryGAFByConstituents(
predicate,
args,
context,
strength,
generatedPhrase,
creator,
creationPurpose,
creationDate);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + quaternaryGAF);
}
newTermIdArray[termId] = quaternaryGAF.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the quintary GAFs. */
private void convertQuintaryGAFs() {
if (CONVERSION_STARTING_STEP > 15) {
return;
}
LOGGER.info("converting quintary GAFs");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(QUINTARY_GAFS_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] strengthResults = parseDouble(line);
final Double strength = (Double) strengthResults[0];
String remainingLine = (String) strengthResults[1];
remainingLine = parseComma(remainingLine);
final Object[] generatedPhraseResults = parseString(remainingLine);
final String generatedPhrase = (String) generatedPhraseResults[0];
remainingLine = (String) generatedPhraseResults[1];
remainingLine = parseComma(remainingLine);
final Object[] contextTermIdResults = parseInteger(remainingLine);
final Integer contextTermId = (Integer) contextTermIdResults[0];
remainingLine = (String) contextTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
remainingLine = (String) creationPurposeTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
remainingLine = (String) creatorTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
remainingLine = (String) creationTimePointTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] predicateTermIdResults = parseInteger(remainingLine);
final Integer predicateTermId = (Integer) predicateTermIdResults[0];
remainingLine = (String) predicateTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg1TermIdResults = parseInteger(remainingLine);
final Integer arg1TermId = (Integer) arg1TermIdResults[0];
remainingLine = (String) arg1TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg2TermIdResults = parseInteger(remainingLine);
final Integer arg2TermId = (Integer) arg2TermIdResults[0];
remainingLine = (String) arg2TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg3TermIdResults = parseInteger(remainingLine);
final Integer arg3TermId = (Integer) arg3TermIdResults[0];
remainingLine = (String) arg3TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg4TermIdResults = parseInteger(remainingLine);
final Integer arg4TermId = (Integer) arg4TermIdResults[0];
remainingLine = (String) arg4TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg5TermIdResults = parseInteger(remainingLine);
final Integer arg5TermId = (Integer) arg5TermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
throw new TexaiException("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
assert termTypeArray[termId] == Constants.QUINTARY_GAF : "invalid quintary GAF at termId: " + termId + ", line: " + line;
final AbstractReifiedTerm context = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[contextTermId],
newTermIdArray[contextTermId]);
assert context != null : "context not found for MySQL context term id " + contextTermId;
Date creationDate = new Date();
if (creationTimePointTermId != null) {
creationDate = new Date(timePointDictionary.get(creationTimePointTermId));
}
final AbstractReifiedTerm predicate = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[predicateTermId],
newTermIdArray[predicateTermId]);
assert predicate != null : "predicate not found for MySQL predicate term id " + predicateTermId;
AbstractTerm arg1 = null;
try {
arg1 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg1TermId],
newTermIdArray[arg1TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg1 not found for MySQL term id " + arg1TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg1 not found for MySQL term id " + arg1TermId);
continue;
}
assert arg1 != null : "arg1 must not be null";
AbstractTerm arg2 = null;
try {
arg2 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg2TermId],
newTermIdArray[arg2TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg2 not found for MySQL term id " + arg2TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg2 not found for MySQL term id " + arg2TermId);
continue;
}
assert arg2 != null : "arg2 must not be null";
AbstractTerm arg3 = null;
try {
arg3 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg3TermId],
newTermIdArray[arg3TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg3 not found for MySQL term id " + arg3TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg3 not found for MySQL term id " + arg3TermId);
continue;
}
assert arg3 != null : "arg3 must not be null";
AbstractTerm arg4 = null;
try {
arg4 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg4TermId],
newTermIdArray[arg4TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg4 not found for MySQL term id " + arg4TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg4 not found for MySQL term id " + arg4TermId);
continue;
}
assert arg4 != null : "arg4 must not be null";
AbstractTerm arg5 = null;
try {
arg5 = termFinderFacade.findTermByTermTypeAndId(
termTypeArray[arg5TermId],
newTermIdArray[arg5TermId]);
} catch (final TexaiException ex) {
LOGGER.info(" arg5 not found for MySQL term id " + arg5TermId);
continue;
} catch (final AssertionError ex) {
LOGGER.info(" arg5 not found for MySQL term id " + arg5TermId);
continue;
}
assert arg5 != null : "arg5 must not be null";
final List<AbstractTerm> args = new ArrayList<AbstractTerm>(Constants.QUINTARY_ARGS_SIZE);
args.add(arg1);
args.add(arg2);
args.add(arg3);
args.add(arg4);
args.add(arg5);
final QuintaryGAF quintaryGAF = termFinderFacade.findOrCreateQuintaryGAFByConstituents(
predicate,
args,
context,
strength,
generatedPhrase,
creator,
creationPurpose,
creationDate);
if (nbrItemsProcessed < 10) {
LOGGER.info(" " + quintaryGAF);
}
newTermIdArray[termId] = quintaryGAF.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the binary GAFs. */
private void convertBinaryGAFs() {
if (CONVERSION_STARTING_STEP > 16) {
return;
}
LOGGER.info("converting binary GAFs");
nbrItemsProcessed = 0;
final long beginTimeMillis = System.currentTimeMillis();
try {
final BufferedReader bufferedReader = new BufferedReader(new FileReader(BINARY_GAFS_PATH));
while (true) {
String line = bufferedReader.readLine();
if (line == null) {
bufferedReader.close();
break;
}
LOGGER.debug(line);
final Object[] strengthResults = parseDouble(line);
final Double strength = (Double) strengthResults[0];
String remainingLine = (String) strengthResults[1];
remainingLine = parseComma(remainingLine);
final Object[] generatedPhraseResults = parseString(remainingLine);
final String generatedPhrase = (String) generatedPhraseResults[0];
remainingLine = (String) generatedPhraseResults[1];
remainingLine = parseComma(remainingLine);
final Object[] contextTermIdResults = parseInteger(remainingLine);
final Integer contextTermId = (Integer) contextTermIdResults[0];
remainingLine = (String) contextTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationPurposeTermIdResults = parseInteger(remainingLine);
final Integer creationPurposeTermId = (Integer) creationPurposeTermIdResults[0];
remainingLine = (String) creationPurposeTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creatorTermIdResults = parseInteger(remainingLine);
final Integer creatorTermId = (Integer) creatorTermIdResults[0];
remainingLine = (String) creatorTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] creationTimePointTermIdResults = parseInteger(remainingLine);
final Integer creationTimePointTermId = (Integer) creationTimePointTermIdResults[0];
remainingLine = (String) creationTimePointTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] termIdResults = parseInteger(remainingLine);
final Integer termId = (Integer) termIdResults[0];
remainingLine = (String) termIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] predicateTermIdResults = parseInteger(remainingLine);
final Integer predicateTermId = (Integer) predicateTermIdResults[0];
remainingLine = (String) predicateTermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg1TermIdResults = parseInteger(remainingLine);
final Integer arg1TermId = (Integer) arg1TermIdResults[0];
remainingLine = (String) arg1TermIdResults[1];
remainingLine = parseComma(remainingLine);
final Object[] arg2TermIdResults = parseInteger(remainingLine);
final Integer arg2TermId = (Integer) arg2TermIdResults[0];
AbstractReifiedTerm creator = null;
if (creatorTermId != null) {
creator = reifiedTermCache.get(creatorTermId);
if (creator == null) {
final byte creatorTermType = termTypeArray[creatorTermId];
if (creatorTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creatorTermId];
if (newTermId != 0) {
try {
creator = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creator);
reifiedTermCache.put(creatorTermId, creator);
} catch (final TexaiException ex) {
throw new TexaiException("creator not found for " + creatorTermId + " in " + line);
}
}
} else {
LOGGER.info(" not an atomic creator " + line);
}
}
}
AbstractReifiedTerm creationPurpose = null;
if (creationPurposeTermId != null) {
creationPurpose = reifiedTermCache.get(creationPurposeTermId);
if (creationPurpose == null) {
final byte creationPurposeTermType = termTypeArray[creationPurposeTermId];
if (creationPurposeTermType == Constants.ATOMIC) {
final int newTermId = newTermIdArray[creationPurposeTermId];
if (newTermId != 0) {
creationPurpose = termFinderFacade.findAtomicTermByTermId(newTermId);
LOGGER.info(" found " + creationPurpose);
reifiedTermCache.put(creationPurposeTermId, creationPurpose);
}
} else {
LOGGER.info(" not an atomic creation purpose " + line);
}
}
}
if (creator != null && creationPurpose == null) {
creator = null;
}
if (creator == null && creationPurpose != null) {
creationPurpose = null;
}
if (termTypeArray[termId] != Constants.BINARY_GAF) {
LOGGER.info("invalid binary GAF at termId: " + termId + ", line: " + line);
continue;
}
final AbstractReifiedTerm context = (AbstractReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
termTypeArray[contextTermId],
newTermIdArray[contextTermId]);
assert context != null : "context not found for MySQL context term id " + contextTermId;
Date creationDate = new Date();
if (creationTimePointTermId != null) {
creationDate = new Date(timePointDictionary.get(creationTimePointTermId));
}
// final ReifiedTerm predicate = (ReifiedTerm) termFinderFacade.findTermByTermTypeAndId(
// termTypeArray[predicateTermId],
// newTermIdArray[predicateTermId]);
// assert predicate != null : "predicate not found for MySQL predicate term id " + predicateTermId;
final AbstractReifiedTerm predicate = new StubReifiedTerm(termTypeArray[predicateTermId], newTermIdArray[predicateTermId]);
// Term arg1 = null;
// try {
// arg1 = termFinderFacade.findTermByTermTypeAndId(
// termTypeArray[arg1TermId],
// newTermIdArray[arg1TermId]);
// } catch (final TexaiException ex) {
// LOGGER.info(" arg1 not found for MySQL term id " + arg1TermId);
// continue;
// } catch (final AssertionError ex) {
// LOGGER.info(" arg1 not found for MySQL term id " + arg1TermId);
// continue;
// }
// assert arg1 != null : "arg1 must not be null";
final AbstractTerm arg1 = new StubTerm(termTypeArray[arg1TermId], newTermIdArray[arg1TermId]);
// Term arg2 = null;
// try {
// arg2 = termFinderFacade.findTermByTermTypeAndId(
// termTypeArray[arg2TermId],
// newTermIdArray[arg2TermId]);
// } catch (final TexaiException ex) {
// LOGGER.info(" arg2 not found for MySQL term id " + arg2TermId);
// continue;
// } catch (final AssertionError ex) {
// LOGGER.info(" arg2 not found for MySQL term id " + arg2TermId);
// continue;
// }
// assert arg2 != null : "arg2 must not be null";
final AbstractTerm arg2 = new StubTerm(termTypeArray[arg2TermId], newTermIdArray[arg2TermId]);
final List<AbstractTerm> args = new ArrayList<AbstractTerm>(Constants.BINARY_ARGS_SIZE);
args.add(arg1);
args.add(arg2);
final BinaryGAF binaryGAF = termFinderFacade.createBinaryGAFByConstituents(
predicate,
args,
context,
strength,
generatedPhrase,
false,
creator,
creationPurpose,
creationDate);
if ((nbrItemsProcessed < 10) || (nbrItemsProcessed % 100000 == 0)) {
LOGGER.info(" " + nbrItemsProcessed + " " + binaryGAF);
}
newTermIdArray[termId] = binaryGAF.getTermId();
nbrItemsProcessed++;
}
} catch (final FileNotFoundException ex) {
throw new TexaiException(ex);
} catch (final IOException ex) {
throw new TexaiException(ex);
}
final long endTimeMillis = System.currentTimeMillis();
final long nbrPerSecond = nbrItemsProcessed * 1000L / (endTimeMillis - beginTimeMillis);
LOGGER.info(nbrItemsProcessed + " items processed, " + nbrPerSecond + " per second");
}
/** Converts the formulas phase 2 - convert the term list. */
private void convertFormulasPhase2() {
}
/** Finalizes this application. */
private void finalization() {
try {
LOGGER.info("storing the term id cross reference array");
final BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(NEW_TERM_ID_ARRAY_PATH));
for (int i = 0; i < newTermIdArray.length; i++) {
bufferedWriter.write(i + "," + newTermIdArray[i]);
bufferedWriter.newLine();
}
bufferedWriter.close();
} catch (final IOException ex) {
throw new TexaiException(ex);
}
try {
entityStore.close();
LOGGER.info("checkpointing");
final CheckpointConfig checkpointConfig = new CheckpointConfig();
environment.checkpoint(checkpointConfig);
LOGGER.info("cleaning the log");
environment.cleanLog();
environment.close();
} catch (final DatabaseException ex) {
throw new TexaiException(ex);
}
}
/** Parses the next string from the given line.
*
* @param line the given line of comma-separated-values
* @return an array of two objects, the first of which is the parsed string, and
* the second of which is the remaining line.
*/
private Object[] parseString(final String line) {
//Preconditions
assert line != null : "line must not be null";
assert line.startsWith("\"") || line.startsWith("\\N") : "invalid string " + line;
assert line.length() > 1 : "line must not be an empty string";
if (line.startsWith("\\N,")) {
final Object[] results = {null, line.substring(2)};
return results;
}
try {
int index = 0;
assert line.charAt(index) == '"' : "invalid string " + line;
index++;
stringBuilder.setLength(0);
while (true) {
char ch = line.charAt(index);
if (ch == '\\') {
index++;
ch = line.charAt(index);
} else if (ch == '"') {
break;
}
stringBuilder.append(ch);
index++;
}
final Object[] results = {stringBuilder.toString(), line.substring(index + 1)};
return results;
} catch (final StringIndexOutOfBoundsException ex) {
throw new TexaiException(ex + "\n" + line);
}
}
/** Parses the next comma from the given line.
*
* @param line the given line of comma-separated-values
* @return the line remaining after parsing the next comma
*/
private String parseComma(final String line) {
//Preconditions
assert line != null : "line must not be null";
assert line.startsWith(",") : "invalid string " + line;
assert line.length() > 1 : "line must not be an empty string";
return line.substring(1);
}
/** Parses the next integer from the given line.
*
* @param line the given line of comma-separated-values
* @return an array of two objects, the first of which is the parsed long, and
* the second of which is the remaining line.
*/
private Object[] parseInteger(final String line) {
//Preconditions
assert line != null : "line must not be null";
assert line.length() > 0 : "line must not be an empty string";
String remainingLine = null;
int index = 0;
stringBuilder.setLength(0);
while (true) {
if (index >= line.length()) {
remainingLine = "";
break;
}
final char ch = line.charAt(index);
if (ch == ',') {
remainingLine = line.substring(index);
break;
}
stringBuilder.append(ch);
index++;
}
final String digits = stringBuilder.toString();
Integer termId = null;
if (!"\\N".equals(digits)) {
termId = Integer.parseInt(digits);
}
final Object[] results = {termId, remainingLine};
return results;
}
/** Parses the next long from the given line.
*
* @param line the given line of comma-separated-values
* @return an array of two objects, the first of which is the parsed long, and
* the second of which is the remaining line.
*/
private Object[] parseLong(final String line) {
//Preconditions
assert line != null : "line must not be null";
assert line.length() > 0 : "line must not be an empty string";
String remainingLine = null;
int index = 0;
stringBuilder.setLength(0);
while (true) {
if (index >= line.length()) {
remainingLine = "";
break;
}
final char ch = line.charAt(index);
if (ch == ',') {
remainingLine = line.substring(index);
break;
}
stringBuilder.append(ch);
index++;
}
final Object[] results = {Long.parseLong(stringBuilder.toString()), remainingLine};
return results;
}
/** Parses the next double from the given line.
*
* @param line the given line of comma-separated-values
* @return an array of two objects, the first of which is the parsed double, and
* the second of which is the remaining line.
*/
private Object[] parseDouble(final String line) {
//Preconditions
assert line != null : "line must not be null";
assert line.length() > 0 : "line must not be an empty string";
String remainingLine = null;
int index = 0;
stringBuilder.setLength(0);
while (true) {
if (index >= line.length()) {
remainingLine = "";
break;
}
final char ch = line.charAt(index);
if (ch == ',') {
remainingLine = line.substring(index);
break;
}
stringBuilder.append(ch);
index++;
}
final Object[] results = {Double.parseDouble(stringBuilder.toString()), remainingLine};
return results;
}
/** Executes this application.
*
* @param args the command line arguments (unused)
*/
public static void main(final String[] args) {
final ConvertMySQLToBerkeleyDB convertMySQLToBerkeleyDB = new ConvertMySQLToBerkeleyDB();
try {
convertMySQLToBerkeleyDB.initialize();
convertMySQLToBerkeleyDB.convertAbstractTerms();
convertMySQLToBerkeleyDB.convertTimePoints();
convertMySQLToBerkeleyDB.convertSymbols();
convertMySQLToBerkeleyDB.convertVariables();
convertMySQLToBerkeleyDB.convertLongs();
convertMySQLToBerkeleyDB.convertDoubles();
convertMySQLToBerkeleyDB.convertStrings();
convertMySQLToBerkeleyDB.convertAtomicTerms();
convertMySQLToBerkeleyDB.convertFormulasPhase1();
convertMySQLToBerkeleyDB.convertNonAtomicTerms();
convertMySQLToBerkeleyDB.convertRules();
convertMySQLToBerkeleyDB.convertUnaryGAFs();
convertMySQLToBerkeleyDB.convertTernaryGAFs();
convertMySQLToBerkeleyDB.convertQuaternaryGAFs();
convertMySQLToBerkeleyDB.convertQuintaryGAFs();
convertMySQLToBerkeleyDB.convertBinaryGAFs();
// repeat ternary GAF conversion to pick up binary GAF terms
convertMySQLToBerkeleyDB.convertTernaryGAFs();
convertMySQLToBerkeleyDB.convertFormulasPhase2();
convertMySQLToBerkeleyDB.finalization();
} catch (final TexaiException ex) {
LOGGER.error(ex);
ex.printStackTrace(System.err);
} catch (final Exception ex) {
LOGGER.error(ex);
ex.printStackTrace(System.err);
}
System.exit(0);
}
}
See more files for this project here