Code Search for Developers
 
 
  

ValidateLexiconEntities.java from Texai at Krugle


Show ValidateLexiconEntities.java syntax highlighted

/*
 * ValidateLexiconEntities.java
 *
 * Created on August 29, 2007, 12:26 PM
 *
 * Description: Validates the lexicon repository contents.
 *
 * Copyright (C) August 29, 2007 Stephen L. Reed.
 *
 * This program is free software; you can redistribute it and/or modify it under the terms
 * of the GNU General Public License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with this program;
 * if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

package org.texai.lexicon.domainEntity;

import java.io.File;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import net.jcip.annotations.GuardedBy;
import net.jcip.annotations.Immutable;
import net.jcip.annotations.NotThreadSafe;
import net.sf.ehcache.CacheManager;
import org.apache.log4j.Logger;
import org.openrdf.OpenRDFException;
import org.openrdf.model.URI;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.sail.nativerdf.NativeStore;
import org.texai.grammar.domainEntity.AlternativeConstruction;
import org.texai.kb.CacheInitializer;
import org.texai.kb.Constants;
import org.texai.kb.persistence.RDFEntityManager;
import org.texai.util.TexaiException;

/** Validates the lexicon repository contents.
 *
 * @author reed
 */
@NotThreadSafe
public class ValidateLexiconEntities {
  
  /** the number of validation threads */
  private static final int NBR_THREADS = 2;
  
  /** the logger */
  private final Logger LOGGER = Logger.getLogger(ValidateLexiconEntities.class);
  
  /** the Sesame repository */
  private Repository repository;
  
  /** the executor */
  private final ExecutorService executor;
  
  /** the list of URIs to validate */
  private final Set<URI> urisToValidate = new HashSet<URI>();
  
  /** the iterator of URIs to validate */
  @GuardedBy("itself")
  private Iterator<URI> urisToValidate_iter;
  
  /** the number of validated entities so far */
  @GuardedBy("nbrURIsValidated_lock")
  private volatile int nbrURIsValidated = 0;
  
  /** the lock for the number of validated entities so far */
  private Object nbrURIsValidated_lock = new Object();
  
  /** the class URIs for entities to validate */
  private String[] classURIStrings = {
    "http://texai.org/texai/org.texai.lexicon.domainEntity.TexaiEnglishWord",
    "http://texai.org/texai/org.texai.lexicon.domainEntity.TexaiEnglishWordForm",
    "http://texai.org/texai/org.texai.lexicon.domainEntity.TexaiEnglishWordSense",
    "http://texai.org/texai/org.texai.lexicon.domainEntity.TexaiSamplePhrase"
  };
  
  
  
  /**
   * Creates a new instance of ValidateLexiconEntities.
   */
  public ValidateLexiconEntities() {
    executor = Executors.newFixedThreadPool(NBR_THREADS);
  }
  
  /** Initializes this application. */
  public void initialize() {
    CacheInitializer.initializeCaches();
    getClass().getClassLoader().setDefaultAssertionStatus(true);   // optional
    CacheInitializer.initializeCaches();
//    final File dataDirectory = new File(System.getProperties().getProperty("user.home") + "/.aduna/openrdf-sesame/repositories/TexaiEnglishLexicon");
    final File dataDirectory = new File("/mnt/tmpfs/repositories/TexaiEnglishLexicon");
    LOGGER.info("accessing the Sesame2 repository in " + dataDirectory.toString());
    final String indices = "spoc,posc";
    repository = new SailRepository(new NativeStore(dataDirectory, indices));
    try {
      repository.initialize();
    } catch (final RepositoryException ex) {
      throw new TexaiException(ex);
    }
  }

  /** Validates the CMU dictionary RDF entities. */
  public void validate() {
    for (final String classURIString : classURIStrings) {
      validateRDFEntities(classURIString);
    }
  }

  /** Validates the RDF entities having the given class URI.
   *
   * @param classURI the class URI
   */
  private void validateRDFEntities(final String classURIString) {
    LOGGER.info("");
    LOGGER.info("querying the entity URIs having class URI " + classURIString);
    try {
      final RepositoryConnection repositoryConnection = repository.getConnection();
      final String queryString =
              "SELECT s FROM {s} rdf:type {<" + classURIString + ">}";
      LOGGER.info("query: " + queryString);
      final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
      final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
      urisToValidate.clear();
      while (tupleQueryResult.hasNext()) {
        urisToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
      }
      tupleQueryResult.close();
      if (LOGGER.isDebugEnabled()) {
        LOGGER.info("closing the query repository connection");
      }
      repositoryConnection.close();
      if (urisToValidate.isEmpty()) {
        LOGGER.info("*** no entity URIs selected " + classURIString);
        return;
      }
    } catch (final MalformedQueryException ex) {
      throw new TexaiException(ex);
    } catch (final RepositoryException ex) {
      throw new TexaiException(ex);
    } catch (final OpenRDFException ex) {
      throw new TexaiException(ex);
    }
    nbrURIsValidated = 0;
    final int urisToValidate_size = urisToValidate.size();
    LOGGER.info("found " + urisToValidate_size + " entity URIs");
    urisToValidate_iter = urisToValidate.iterator();
    final long startMillis = System.currentTimeMillis();
    final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
    for (int i = 0; i < NBR_THREADS; i++) {
      executor.execute(new EntityValidationRunnable(classURIString, doneSignal, i + 1));
    }
    try {
      doneSignal.await();
    } catch (InterruptedException ex) {
      throw new TexaiException(ex);
    }
    double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
    if (secondsDuration == 0) {
      secondsDuration = 1;
    }
    LOGGER.info("validated " + urisToValidate_size + " at the rate of " + urisToValidate_size /  secondsDuration + " per second");
  }
  
  /** A parallel runnable that validates entity URIs which it obtains from the shared iterator. */
  @Immutable
  class EntityValidationRunnable implements Runnable {
    
    /** the entity class URI string */
    private final String classURIString;
    
    /** the count down latch that synchronizes the calling thread */
    private final CountDownLatch doneSignal;
    
    /** the thread id */
    private final int id;
    
    /** Constructs a new EntityValidationRunnable instance.
     *
     * @param classURIString the entity class URI string
     * @param doneSignal the count down latch that synchronizes the calling thread
     * @param id the identification for this runnable
     */
    public EntityValidationRunnable(final String classURIString, final CountDownLatch doneSignal, final int id) {
      //Preconditions
      assert doneSignal != null : "doneSignal must not be null";
      assert classURIString != null : "classURIString must not be null";
      assert !classURIString.isEmpty() : "classURIString must not be empty";
      
      this.classURIString = classURIString;
      this.doneSignal = doneSignal;
      this.id = id;
    }
    
    /** Executes this thread. */
    public void run() {
      RDFEntityManager rdfEntityManager = null;
      try {
        LOGGER.info("starting " + id);
        Thread.currentThread().setName("validator " + id);
        rdfEntityManager = new RDFEntityManager(repository);
        int nbrURIsProcessed = 0;
        boolean isDone = false;
        while (! isDone) {
          final URI uri = getNextEntityURIToValidate();
          if (uri == null) {
            isDone = true;
          } else {
            boolean isLogged = false;
            synchronized (nbrURIsValidated_lock) {
              nbrURIsValidated++;
              if (nbrURIsValidated % 2500 == 0) {
                isLogged = true;
                CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
              }
            }
            if ("http://texai.org/texai/org.texai.lexicon.domainEntity.TexaiEnglishWord".equals(classURIString)) {
              validateTexaiEnglishWord(rdfEntityManager, uri, isLogged);
            } else if ("http://texai.org/texai/org.texai.lexicon.domainEntity.TexaiEnglishWordForm".equals(classURIString)) {
              validateTexaiEnglishWordForm(rdfEntityManager, uri, isLogged);
            } else if ("http://texai.org/texai/org.texai.lexicon.domainEntity.TexaiEnglishWordSense".equals(classURIString)) {
              validateTexaiEnglishWordSense(rdfEntityManager, uri, isLogged);
            } else if ("http://texai.org/texai/org.texai.lexicon.domainEntity.TexaiSamplePhrase".equals(classURIString)) {
              validateTexaiSamplePhrase(rdfEntityManager, uri, isLogged);
            }
            nbrURIsProcessed++;
          }
        }
        LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " entity URIs");
        doneSignal.countDown();
      } catch (final Exception ex) {
        LOGGER.error(ex.getMessage(), ex);
        ex.printStackTrace();
      } finally {
        if (rdfEntityManager != null) {
          rdfEntityManager.close();
        }
      }
    }
  }
  
  /** Validates the TexaiEnglishWord entity.
   *
   * @param rdfEntityManager the RDF entity manager
   * @param uri the RDF entity URI
   * @param isLogged the indicator whether to log this entity
   */
  private void validateTexaiEnglishWord(final RDFEntityManager rdfEntityManager, final URI uri, final boolean isLogged) {
    //Preconditions
    assert rdfEntityManager != null : "rdfEntityManger must not be null";
    assert uri != null : "uri must not be null";
    
    final TexaiEnglishWord texaiEnglishWord = (TexaiEnglishWord) rdfEntityManager.find(TexaiEnglishWord.class, uri);
    if (isLogged) {
      LOGGER.info(texaiEnglishWord.getLemma() + " by thread " + Thread.currentThread().getName());
    }
    
    // the id
    if (!texaiEnglishWord.getId().equals(uri)) {
      LOGGER.warn(texaiEnglishWord + " ids are not equal");
    } else if (LOGGER.isDebugEnabled()) {
      LOGGER.debug("id OK for " + texaiEnglishWord.getId());
    }
    
    //TODO
  }
  
  /** Validates the TexaiEnglishWordForm entity.
   *
   * @param rdfEntityManager the RDF entity manager
   * @param uri the RDF entity URI
   * @param isLogged the indicator whether to log this entity
   */
  private void validateTexaiEnglishWordForm(final RDFEntityManager rdfEntityManager, final URI uri, final boolean isLogged) {
    //Preconditions
    assert rdfEntityManager != null : "rdfEntityManger must not be null";
    assert uri != null : "uri must not be null";
    
    final TexaiEnglishWordForm texaiEnglishWordForm = (TexaiEnglishWordForm) rdfEntityManager.find(TexaiEnglishWordForm.class, uri);
    if (isLogged) {
      LOGGER.info(texaiEnglishWordForm.getWordForm() + " by thread " + Thread.currentThread().getName());
    }
    
    // the id
    if (!texaiEnglishWordForm.getId().equals(uri)) {
      LOGGER.warn(texaiEnglishWordForm + " ids are not equal");
    } else if (LOGGER.isDebugEnabled()) {
      LOGGER.debug("id OK for " + texaiEnglishWordForm.getId());
    }
    
    //TODO
  }
  
  /** Validates the TexaiEnglishWordSense entity.
   *
   * @param rdfEntityManager the RDF entity manager
   * @param uri the RDF entity URI
   * @param isLogged the indicator whether to log this entity
   */
  private void validateTexaiEnglishWordSense(final RDFEntityManager rdfEntityManager, final URI uri, final boolean isLogged) {
    //Preconditions
    assert rdfEntityManager != null : "rdfEntityManger must not be null";
    assert uri != null : "uri must not be null";
    
    final TexaiEnglishWordSense texaiEnglishWordSense = (TexaiEnglishWordSense) rdfEntityManager.find(TexaiEnglishWordSense.class, uri);
    if (isLogged) {
      LOGGER.info(texaiEnglishWordSense.toString() + " by thread " + Thread.currentThread().getName());
    }
    
    // the id
    if (!texaiEnglishWordSense.getId().equals(uri)) {
      LOGGER.warn(texaiEnglishWordSense + " ids are not equal");
    } else if (LOGGER.isDebugEnabled()) {
      LOGGER.debug("id OK for " + texaiEnglishWordSense.getId());
    }
    
    //TODO
  }
  
  /** Validates the TexaiSamplePhrase entity.
   *
   * @param rdfEntityManager the RDF entity manager
   * @param uri the RDF entity URI
   * @param isLogged the indicator whether to log this entity
   */
  private void validateTexaiSamplePhrase(final RDFEntityManager rdfEntityManager, final URI uri, final boolean isLogged) {
    //Preconditions
    assert rdfEntityManager != null : "rdfEntityManger must not be null";
    assert uri != null : "uri must not be null";
    
    final TexaiSamplePhrase texaiSamplePhrase = (TexaiSamplePhrase) rdfEntityManager.find(TexaiSamplePhrase.class, uri);
    if (isLogged) {
      LOGGER.info(texaiSamplePhrase.toString() + " by thread " + Thread.currentThread().getName());
    }
    
    // the id
    if (!texaiSamplePhrase.getId().equals(uri)) {
      LOGGER.warn(texaiSamplePhrase + " ids are not equal");
    } else if (LOGGER.isDebugEnabled()) {
      LOGGER.debug("id OK for " + texaiSamplePhrase.getId());
    }
    
    //TODO
  }
  
  /** Gets the next entity URI to validate.
   *
   * @return the next WordNet synset URI to validate, or null when done
   */
  private URI getNextEntityURIToValidate() {
    synchronized(urisToValidate_iter) {
      if (urisToValidate_iter.hasNext()) {
        return  urisToValidate_iter.next();
      } else {
        return null;
      }
    }
  }
  

  /** Finalizes this application. */
  public void finalization() {
    executor.shutdown();
    CacheManager.getInstance().shutdown();
    try {
      repository.shutDown();
    } catch (final RepositoryException ex) {
      throw new TexaiException(ex);
    }
    LOGGER.info("ValidateLexiconEntities completed");
  }

  /** Executes this application.
   *
   * @param args the command line arguments (unused)
   */
  public static void main(final String[] args) {
    final ValidateLexiconEntities validateCMUPronouncingDictionaryEntities = new ValidateLexiconEntities();
    validateCMUPronouncingDictionaryEntities.initialize();
    validateCMUPronouncingDictionaryEntities.validate();
    validateCMUPronouncingDictionaryEntities.finalization();
    System.exit(0);
  }
}




See more files for this project here

Texai

Texai is an chatbot that intelligently seeks to acquire knowledge and friendly behaviors.

Project homepage: http://sourceforge.net/projects/texai
Programming language(s): Java,Shell Script,XML
License: other

  TexaiEnglishWord.java
  TexaiEnglishWordForm.java
  TexaiEnglishWordSense.java
  TexaiSamplePhrase.java
  ValidateLexiconEntities.java