ValidateWordNetEntities.java from Texai at Krugle
Show ValidateWordNetEntities.java syntax highlighted
/*
* ValidateWordNetEntities.java
*
* Created on August 27, 2007, 11:37 AM
*
* Description: .
*
* Copyright (C) August 27, 2007 Stephen L. Reed.
*
* This program is free software; you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with this program;
* if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.texai.wordnet.domain.entity;
import java.io.File;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import net.jcip.annotations.GuardedBy;
import net.jcip.annotations.Immutable;
import net.jcip.annotations.NotThreadSafe;
import net.sf.ehcache.CacheManager;
import org.apache.log4j.Logger;
import org.openrdf.OpenRDFException;
import org.openrdf.model.URI;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.sail.nativerdf.NativeStore;
import org.texai.kb.CacheInitializer;
import org.texai.kb.Constants;
import org.texai.kb.persistence.RDFEntityManager;
import org.texai.util.TexaiException;
/**
*
* @author reed
*/
@NotThreadSafe
public class ValidateWordNetEntities {
/** the number of validation threads */
private static final int NBR_THREADS = 2;
/** the logger */
private final Logger LOGGER = Logger.getLogger(ValidateWordNetEntities.class);
/** the Sesame repository */
private Repository repository;
/** the executor */
private final ExecutorService executor;
/** the list of WordNet synset URIs to validate */
private final Set<URI> wordNetSynsetURIsToValidate = new HashSet<URI>();
/** the iterator of WordNet synset URIs to validate */
@GuardedBy("itself")
private Iterator<URI> wordNetSynsetURIsToValidate_iter;
/** the set of validated WordNet synset URIs */
@GuardedBy("itself")
private final HashSet<URI> validatedWordNetSynsets = new HashSet<URI>();
/** the number of validated WordNet synsets */
@GuardedBy("validatedWordNetSynsets")
private volatile int nbrOfValidatedWordNetSynsets = 0;
/** the list of WordNet word sense URIs to validate */
private final Set<URI> wordNetWordSenseURIsToValidate = new HashSet<URI>();
/** the iterator of WordNet word sense URIs to validate */
@GuardedBy("itself")
private Iterator<URI> wordNetWordSenseURIsToValidate_iter;
/** the set of validated WordNet word sense URIs */
@GuardedBy("itself")
private final HashSet<URI> validatedWordNetWordSenses = new HashSet<URI>();
/** the number of validated WordNet word senses */
@GuardedBy("validatedWordNetWordSenses")
private volatile int nbrOfValidatedWordNetWordSenses = 0;
/** the list of WordNet cased English word URIs to validate */
private final Set<URI> wordNetCasedEnglishWordURIsToValidate = new HashSet<URI>();
/** the iterator of WordNet cased English word URIs to validate */
@GuardedBy("itself")
private Iterator<URI> wordNetCasedEnglishWordURIsToValidate_iter;
/** the number of validated WordNet cased English words */
@GuardedBy("wordNetCasedEnglishWordURIsToValidate")
private volatile int nbrOfValidatedWordNetCasedEnglishWords = 0;
/** the list of WordNet category URIs to validate */
private final Set<URI> wordNetCategoryURIsToValidate = new HashSet<URI>();
/** the iterator of WordNet category URIs to validate */
@GuardedBy("itself")
private Iterator<URI> wordNetCategoryURIsToValidate_iter;
/** the number of validated WordNet categories */
@GuardedBy("wordNetCategoryURIsToValidate")
private volatile int nbrOfValidatedWordNetCategories = 0;
/** the list of WordNet English word URIs to validate */
private final Set<URI> wordNetEnglishWordURIsToValidate = new HashSet<URI>();
/** the iterator of WordNet English word URIs to validate */
@GuardedBy("itself")
private Iterator<URI> wordNetEnglishWordURIsToValidate_iter;
/** the number of validated WordNet English words */
@GuardedBy("wordNetEnglishWordURIsToValidate")
private volatile int nbrOfValidatedWordNetEnglishWords = 0;
/** the list of WordNet English word morphological variation URIs to validate */
private final Set<URI> wordNetEnglishWordMorphURIsToValidate = new HashSet<URI>();
/** the iterator of WordNet English word morphological variation URIs to validate */
@GuardedBy("itself")
private Iterator<URI> wordNetEnglishWordMorphURIsToValidate_iter;
/** the number of validated WordNet English word morphological variations */
@GuardedBy("wordNetEnglishWordMorphURIsToValidate")
private volatile int nbrOfValidatedWordNetEnglishWordMorphs = 0;
/** the list of WordNet sample phrase item URIs to validate */
private final Set<URI> wordNetSamplePhraseItemURIsToValidate = new HashSet<URI>();
/** the iterator of WordNet sample phrase item URIs to validate */
@GuardedBy("itself")
private Iterator<URI> wordNetSamplePhraseItemURIsToValidate_iter;
/** the number of validated WordNet sample phrase items */
@GuardedBy("wordNetSamplePhraseItemsToValidate")
private volatile int nbrOfValidatedWordNetSamplePhraseItems = 0;
/** the list of WordNet sentence pattern URIs to validate */
private final Set<URI> wordNetSentencePatternURIsToValidate = new HashSet<URI>();
/** the iterator of WordNet sentence pattern URIs to validate */
@GuardedBy("itself")
private Iterator<URI> wordNetSentencePatternURIsToValidate_iter;
/** the number of validated WordNet sentence patterns */
@GuardedBy("wordNetSentencePatternsToValidate")
private volatile int nbrOfValidatedWordNetSentencePatterns = 0;
/** Creates a new instance of ValidateWordNetEntities. */
public ValidateWordNetEntities() {
executor = Executors.newFixedThreadPool(NBR_THREADS);
}
/** Initializes this application. */
public void initialize() {
CacheInitializer.initializeCaches();
getClass().getClassLoader().setDefaultAssertionStatus(true); // optional
CacheInitializer.initializeCaches();
// final File dataDirectory = new File(System.getProperties().getProperty("user.home") + "/.aduna/openrdf-sesame/repositories/WordNet21Domain");
final File dataDirectory = new File("/mnt/tmpfs/repositories/WordNet21Domain");
LOGGER.info("accessing the Sesame2 repository in " + dataDirectory.toString());
final String indices = "spoc,posc";
repository = new SailRepository(new NativeStore(dataDirectory, indices));
try {
repository.initialize();
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
}
}
/** Validates the WordNet RDF entities. */
public void validate() {
validateWordNetSynsets();
validateWordNetWordSenses();
validateWordNetCasedEnglishWords();
validateWordNetCategories();
validateWordNetEnglishWords();
validateWordNetEnglishWordMorphs();
validateWordNetSamplePhraseItems();
validateWordNetSentencePatterns();
}
/** Validates the WordNet synsets. */
private void validateWordNetSynsets() {
LOGGER.info("Querying the WordNet synset URIs");
try {
final RepositoryConnection repositoryConnection = repository.getConnection();
final String queryString =
"SELECT s FROM {s} rdf:type {<http://sw.cyc.com/2006/07/27/cyc/WordNetSynset>}";
LOGGER.info("query " + queryString);
final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
while (tupleQueryResult.hasNext()) {
wordNetSynsetURIsToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
}
tupleQueryResult.close();
LOGGER.info("closing the query repository connection");
repositoryConnection.close();
if (wordNetSynsetURIsToValidate.isEmpty()) {
throw new TexaiException("no WordNet synset URIs selected");
}
} catch (final MalformedQueryException ex) {
throw new TexaiException(ex);
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
} catch (final OpenRDFException ex) {
throw new TexaiException(ex);
}
final int wordNetSynsetURIsToValidate_size = wordNetSynsetURIsToValidate.size();
LOGGER.info("Found " + wordNetSynsetURIsToValidate_size + " WordNet synset URIs");
wordNetSynsetURIsToValidate_iter = wordNetSynsetURIsToValidate.iterator();
final long startMillis = System.currentTimeMillis();
final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
for (int i = 0; i < NBR_THREADS; i++) {
executor.execute(new WordNetSynsetValidationRunnable(doneSignal, i + 1));
}
try {
doneSignal.await();
} catch (InterruptedException ex) {
throw new TexaiException(ex);
}
double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
if (secondsDuration == 0) {
secondsDuration = 1;
}
LOGGER.info("Validated " + wordNetSynsetURIsToValidate_size + " at the rate of " + wordNetSynsetURIsToValidate_size / secondsDuration + " per second");
LOGGER.info("all WordNet synset validation activities completed");
}
/** Gets the next WordNet synset URI to validate.
*
* @return the next WordNet synset URI to validate, or null when done
*/
private URI getNextWordNetSynsetURIToValidate() {
synchronized(wordNetSynsetURIsToValidate_iter) {
if (wordNetSynsetURIsToValidate_iter.hasNext()) {
return wordNetSynsetURIsToValidate_iter.next();
} else {
return null;
}
}
}
/** A parallel runnable that validates WordNet synset URIs which it obtains from the shared iterator. */
@Immutable
class WordNetSynsetValidationRunnable implements Runnable {
/** the count down latch that synchronizes the calling thread */
private final CountDownLatch doneSignal;
/** the thread id */
private final int id;
/** Constructs a new WordNetSynsetValidationThread instance.
*
* @param doneSignal the count down latch that synchronizes the calling thread
* @param id the identification for this runnable
*/
public WordNetSynsetValidationRunnable(final CountDownLatch doneSignal, final int id) {
//Preconditions
assert doneSignal != null : "doneSignal must not be null";
this.doneSignal = doneSignal;
this.id = id;
}
/** Executes this thread. */
public void run() {
RDFEntityManager rdfEntityManager = null;
try {
LOGGER.info("starting " + id);
Thread.currentThread().setName("synset " + id);
rdfEntityManager = new RDFEntityManager(repository);
int nbrURIsProcessed = 0;
boolean isDone = false;
while (! isDone) {
final URI uri = getNextWordNetSynsetURIToValidate();
if (uri == null) {
isDone = true;
} else {
validateWordNetSynset(rdfEntityManager, uri);
nbrURIsProcessed++;
}
}
LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " WordNet synset URIs");
doneSignal.countDown();
} catch (final Exception ex) {
LOGGER.error(ex.getMessage(), ex);
ex.printStackTrace();
} finally {
if (rdfEntityManager != null) {
rdfEntityManager.close();
}
}
}
}
/** Validates the given WordNet synset.
*
* @param rdfEntityManager the RDF entity manager
* @param id the URI of the WordNet synset to validate
*/
private void validateWordNetSynset(final RDFEntityManager rdfEntityManager, final URI id) {
//Preconditions
assert id != null : "id must not be null";
boolean isSynsetLogged = false;
synchronized (validatedWordNetSynsets) {
if (validatedWordNetSynsets.contains(id)) {
return;
} else {
validatedWordNetSynsets.add(id);
nbrOfValidatedWordNetSynsets++;
if (nbrOfValidatedWordNetSynsets % 2500 == 0) {
isSynsetLogged = true;
CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
}
}
}
final WordNetSynset wordNetSynset = (WordNetSynset) rdfEntityManager.find(WordNetSynset.class, id);
if (isSynsetLogged) {
LOGGER.info(wordNetSynset + " by thread " + Thread.currentThread().getName());
}
// the id
if (!wordNetSynset.getId().equals(id)) {
LOGGER.warn(wordNetSynset + " ids are not equal");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id OK for " + wordNetSynset.getId());
}
// the mapped terms
for (final URI uri : wordNetSynset.getWNMappedTerms()) {
if (uri == null) {
LOGGER.warn("wordNetSynset has a null mapped term " + wordNetSynset.getWNMappedTerms());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" mapped term " + uri + " OK for " + wordNetSynset.getId());
}
}
// the synonym set id
if (wordNetSynset.getWNWordNetSynsetId() <= 0) {
LOGGER.warn(id + " invalid synset id " + wordNetSynset.getWNWordNetSynsetId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("synset id " + wordNetSynset.getWNWordNetSynsetId() + " OK for " + wordNetSynset);
}
// the part of speech
if (wordNetSynset.getWNSynsetSpeechPart() == null) {
LOGGER.warn(id + " missing speech part");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("speech part " + wordNetSynset.getWNSynsetSpeechPart() + " OK for " + wordNetSynset);
}
// the WordNet category
wordNetSynset.getWNCategory().toString(); // invoke the lazy loader
if (wordNetSynset.getWNCategory() == null) {
LOGGER.warn(id + " missing WN category");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("category " + wordNetSynset.getWNCategory() + " OK for " + wordNetSynset);
}
// the concept definition
if (wordNetSynset.getWNSynsetGloss() == null || wordNetSynset.getWNSynsetGloss().isEmpty()) {
LOGGER.warn(id + " missing gloss");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("gloss " + wordNetSynset.getWNSynsetGloss() + " OK for " + wordNetSynset);
}
// the word senses
for (final WordNetWordSense wordNetWordSense : wordNetSynset.getWNWordSenses()) {
if (!wordNetSynset.equals(wordNetWordSense.getWordNetSynset())) {
LOGGER.warn("word sense " + wordNetWordSense.getId() + " " + wordNetWordSense.getWordNetEnglishWord().getWNLemma()
+ " does not have this " + wordNetSynset.getId() + " as a synset "
+ wordNetSynset.description());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word sense " + wordNetWordSense + " OK for " + wordNetSynset);
}
}
// the sample phrase items
for (final WordNetSamplePhraseItem wordNetSamplePhraseItem : wordNetSynset.getWNSynsetSamplePhraseItems()) {
wordNetSamplePhraseItem.toString(); // invoke the lazy loader
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("sample phrase item " + wordNetSamplePhraseItem + " OK for " + wordNetSynset);
}
}
// the hypernyms
for (final WordNetSynset hypernym : wordNetSynset.getWNHypernyms()) {
if (!wordNetSynsetURIsToValidate.contains(hypernym.getId())) {
LOGGER.warn("invalid id for hypernym " + hypernym.getId());
}
if (!hypernym.getWNHyponyms().contains(wordNetSynset)) {
LOGGER.warn("hypernym " + hypernym.getId() + " does not have this " + wordNetSynset.getId() + " as a hyponym");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" hypernym " + hypernym + " inverse link OK");
}
}
// the hyponyms
for (final WordNetSynset hyponym : wordNetSynset.getWNHyponyms()) {
if (!wordNetSynsetURIsToValidate.contains(hyponym.getId())) {
LOGGER.warn("invalid id for hyponym " + hyponym.getId());
}
if (!hyponym.getWNHypernyms().contains(wordNetSynset)) {
LOGGER.warn("hyponym " + hyponym.getId() + " does not have this " + wordNetSynset.getId() + " as a hypernym");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" hyponym " + hyponym + " inverse link OK");
}
}
// the instance hypernyms
for (final WordNetSynset wnInstanceHypernym : wordNetSynset.getWNInstanceHypernyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnInstanceHypernym.getId())) {
LOGGER.warn("invalid id for instance hypernym " + wnInstanceHypernym.getId());
}
if (!wnInstanceHypernym.getWNInstanceHyponyms().contains(wordNetSynset)) {
LOGGER.warn("instance hypernym " + wnInstanceHypernym.getId() + " does not have this " + wordNetSynset.getId() + " as an instance hyponym");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" instance hypernym " + wnInstanceHypernym + " inverse link OK");
}
}
// the instance hyponyms
for (final WordNetSynset wnInstanceHyponym : wordNetSynset.getWNInstanceHyponyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnInstanceHyponym.getId())) {
LOGGER.warn("invalid id for instance hyponym " + wnInstanceHyponym.getId());
}
if (!wnInstanceHyponym.getWNInstanceHypernyms().contains(wordNetSynset)) {
LOGGER.warn("instance hyponym " + wnInstanceHyponym.getId() + " does not have this " + wordNetSynset.getId() + " as an instance hypernym");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" instance hyponym " + wnInstanceHyponym + " inverse link OK");
}
}
// the part holonyms
for (final WordNetSynset wnPartHolonym : wordNetSynset.getWNPartHolonyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnPartHolonym.getId())) {
LOGGER.warn("invalid id for part holonym " + wnPartHolonym.getId());
}
if (!wnPartHolonym.getWNPartMeronyms().contains(wordNetSynset)) {
LOGGER.warn("part holonym " + wnPartHolonym.getId() + " does not have this " + wordNetSynset.getId() + " as an part meronym");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" part holonym " + wnPartHolonym + " inverse link OK");
}
}
// the part meronyms
for (final WordNetSynset wnPartMeronym : wordNetSynset.getWNPartMeronyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnPartMeronym.getId())) {
LOGGER.warn("invalid id for part meronym " + wnPartMeronym.getId());
}
if (!wnPartMeronym.getWNPartHolonyms().contains(wordNetSynset)) {
LOGGER.warn("part meronym " + wnPartMeronym.getId() + " does not have this " + wordNetSynset.getId() + " as an part holonym");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" part meronym " + wnPartMeronym + " inverse link OK");
}
}
// the member holonyms
for (final WordNetSynset wnMemberHolonym : wordNetSynset.getWNMemberHolonyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnMemberHolonym.getId())) {
LOGGER.warn("invalid id for member holonym " + wnMemberHolonym.getId());
}
if (!wnMemberHolonym.getWNMemberMeronyms().contains(wordNetSynset)) {
LOGGER.warn("member holonym " + wnMemberHolonym.getId() + " does not have this " + wordNetSynset.getId() + " as a member meronym");
} else {
LOGGER.info(" member holonym " + wnMemberHolonym + " inverse link OK");
}
}
// the member meronyms
for (final WordNetSynset wnMemberMeronym : wordNetSynset.getWNMemberHolonyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnMemberMeronym.getId())) {
LOGGER.warn("invalid id for member meronym " + wnMemberMeronym.getId());
}
if (!wnMemberMeronym.getWNMemberHolonyms().contains(wordNetSynset)) {
LOGGER.warn("member meronym " + wnMemberMeronym.getId() + " does not have this " + wordNetSynset.getId() + " as a member holonym");
} else {
LOGGER.info(" member meronym " + wnMemberMeronym + " inverse link OK");
}
}
// the substance holonyms
for (final WordNetSynset wnSubstanceHolonym : wordNetSynset.getWNMemberHolonyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnSubstanceHolonym.getId())) {
LOGGER.warn("invalid id for substance holonym " + wnSubstanceHolonym.getId());
}
if (!wnSubstanceHolonym.getWNSubstanceMeronyms().contains(wordNetSynset)) {
LOGGER.warn("substance holonym " + wnSubstanceHolonym.getId() + " does not have this " + wordNetSynset.getId() + " as a substance meronym");
} else {
LOGGER.info(" substance holonym " + wnSubstanceHolonym + " inverse link OK");
}
}
// the substance meronyms
for (final WordNetSynset wnSubstanceMeronym : wordNetSynset.getWNMemberHolonyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnSubstanceMeronym.getId())) {
LOGGER.warn("invalid id for substance meronym " + wnSubstanceMeronym.getId());
}
if (!wnSubstanceMeronym.getWNSubstanceHolonyms().contains(wordNetSynset)) {
LOGGER.warn("substance meronym " + wnSubstanceMeronym.getId() + " does not have this " + wordNetSynset.getId() + " as a substance holonym");
} else {
LOGGER.info(" substance meronym " + wnSubstanceMeronym + " inverse link OK");
}
}
// the entailments
for (final WordNetSynset wnEntailment : wordNetSynset.getWNEntailments()) {
if (!wordNetSynsetURIsToValidate.contains(wnEntailment.getId())) {
LOGGER.warn("invalid id for entailment " + wnEntailment.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" entailment " + wnEntailment + " OK");
}
}
// the causes
for (final WordNetSynset wnCause : wordNetSynset.getWNCauses()) {
if (!wordNetSynsetURIsToValidate.contains(wnCause.getId())) {
LOGGER.warn("invalid id for cause " + wnCause.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" cause " + wnCause + " OK");
}
}
// the antonyms (none for WordNet synsets)
for (final WordNetSynset wnAntonym : wordNetSynset.getWNAntonyms()) {
if (!wordNetSynsetURIsToValidate.contains(wnAntonym.getId())) {
LOGGER.warn("invalid id for antonym " + wnAntonym.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.info(" antonym " + wnAntonym + " OK");
}
}
// the similar-tos
for (final WordNetSynset wnSimilarTo : wordNetSynset.getWNSimilarTos()) {
if (!wordNetSynsetURIsToValidate.contains(wnSimilarTo.getId())) {
LOGGER.warn("invalid id for similar-to " + wnSimilarTo.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" similar-to " + wnSimilarTo + " OK");
}
}
// the also-sees
for (final WordNetSynset wnAlsoSee : wordNetSynset.getWNAlsoSees()) {
if (!wordNetSynsetURIsToValidate.contains(wnAlsoSee.getId())) {
LOGGER.warn("invalid id for also-see " + wordNetSynset.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" also-see " + wnAlsoSee + " OK");
}
}
// the attributes
for (final WordNetSynset wnAttribute : wordNetSynset.getWNAttributes()) {
if (!wordNetSynsetURIsToValidate.contains(wnAttribute.getId())) {
LOGGER.warn("invalid id for attribute " + wnAttribute.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" attribute " + wnAttribute + " OK");
}
}
// the verb groups
for (final WordNetSynset wnVerbGroup : wordNetSynset.getWNVerbGroups()) {
if (!wordNetSynsetURIsToValidate.contains(wnVerbGroup.getId())) {
LOGGER.warn("invalid id for verb group " + wnVerbGroup.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" verb group " + wnVerbGroup + " OK");
}
}
// the domain categories
for (final WordNetSynset wnCategoryDomain : wordNetSynset.getWNCategoryDomains()) {
if (!wordNetSynsetURIsToValidate.contains(wnCategoryDomain.getId())) {
LOGGER.warn("invalid id for domain category " + wnCategoryDomain.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" domain category " + wnCategoryDomain + " OK");
}
}
// the domain member categories
for (final WordNetSynset wnCategoryDomainMember : wordNetSynset.getWNCategoryDomainMembers()) {
if (!wordNetSynsetURIsToValidate.contains(wnCategoryDomainMember.getId())) {
LOGGER.warn("invalid id for domain member category " + wnCategoryDomainMember.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" domain member category " + wnCategoryDomainMember + " OK");
}
}
// the domain regions
for (final WordNetSynset wnRegionDomain : wordNetSynset.getWNRegionDomains()) {
if (!wordNetSynsetURIsToValidate.contains(wnRegionDomain.getId())) {
LOGGER.warn("invalid id for domain region " + wnRegionDomain.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" domain region " + wnRegionDomain + " OK");
}
}
// the domain member regions
for (final WordNetSynset wnRegionDomainMember : wordNetSynset.getWNRegionDomainMembers()) {
if (!wordNetSynsetURIsToValidate.contains(wnRegionDomainMember.getId())) {
LOGGER.warn("invalid id for domain member region " + wnRegionDomainMember.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" domain member region " + wnRegionDomainMember + " OK");
}
}
// the domain usages
for (final WordNetSynset wnUsageDomain : wordNetSynset.getWNUsageDomains()) {
if (!wordNetSynsetURIsToValidate.contains(wnUsageDomain.getId())) {
LOGGER.warn("invalid id for domain usage " + wnUsageDomain.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" domain usage " + wnUsageDomain + " OK");
}
}
// the domain member usages
for (final WordNetSynset wnUsageDomainMember : wordNetSynset.getWNUsageDomainMembers()) {
if (!wordNetSynsetURIsToValidate.contains(wnUsageDomainMember.getId())) {
LOGGER.warn("invalid id for domain member usage " + wnUsageDomainMember.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.info(" domain member usage " + wnUsageDomainMember + " OK");
}
}
// the domains (none present)
for (final WordNetSynset wnDomain : wordNetSynset.getWNDomains()) {
if (!wordNetSynsetURIsToValidate.contains(wnDomain.getId())) {
LOGGER.warn("invalid id for domain " + wnDomain.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.info(" domain " + wnDomain + " OK");
}
}
// the members (none present)
for (final WordNetSynset wnMember : wordNetSynset.getWNMembers()) {
if (!wordNetSynsetURIsToValidate.contains(wnMember.getId())) {
LOGGER.warn("invalid id for member " + wnMember.getId());
}
if (LOGGER.isDebugEnabled()) {
LOGGER.info(" member " + wnMember + " OK");
}
}
}
/** Validates the WordNet word senses. */
private void validateWordNetWordSenses() {
LOGGER.info("Querying the WordNet word sense URIs");
try {
final RepositoryConnection repositoryConnection = repository.getConnection();
final String queryString =
"SELECT s FROM {s} rdf:type {<http://sw.cyc.com/2006/07/27/cyc/WordNetWordSense>}";
LOGGER.info("query " + queryString);
final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
while (tupleQueryResult.hasNext()) {
wordNetWordSenseURIsToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
}
tupleQueryResult.close();
LOGGER.info("closing the query repository connection");
repositoryConnection.close();
if (wordNetWordSenseURIsToValidate.isEmpty()) {
throw new TexaiException("no WordNet word sense URIs selected");
}
} catch (final MalformedQueryException ex) {
throw new TexaiException(ex);
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
} catch (final OpenRDFException ex) {
throw new TexaiException(ex);
}
final int wordNetWordSenseURIsToValidate_size = wordNetWordSenseURIsToValidate.size();
LOGGER.info("Found " + wordNetWordSenseURIsToValidate_size + " WordNet word sense URIs");
wordNetWordSenseURIsToValidate_iter = wordNetWordSenseURIsToValidate.iterator();
final long startMillis = System.currentTimeMillis();
final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
for (int i = 0; i < NBR_THREADS; i++) {
executor.execute(new WordNetWordSenseValidationRunnable(doneSignal, i + 1));
}
try {
doneSignal.await();
} catch (InterruptedException ex) {
throw new TexaiException(ex);
}
double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
if (secondsDuration == 0) {
secondsDuration = 1;
}
LOGGER.info("Validated " + wordNetWordSenseURIsToValidate_size + " at the rate of " + wordNetWordSenseURIsToValidate_size / secondsDuration + " per second");
LOGGER.info("all WordNet word sense validation activities completed");
}
/** Gets the next WordNet word sense URI to validate.
*
* @return the next WordNet word sense URI to validate, or null when done
*/
private URI getNextWordNetWordSenseURIToValidate() {
synchronized(wordNetWordSenseURIsToValidate_iter) {
if (wordNetWordSenseURIsToValidate_iter.hasNext()) {
return wordNetWordSenseURIsToValidate_iter.next();
} else {
return null;
}
}
}
/** A parallel runnable that validates WordNet word sense URIs which it obtains from the shared iterator. */
@Immutable
class WordNetWordSenseValidationRunnable implements Runnable {
/** the count down latch that synchronizes the calling thread */
private final CountDownLatch doneSignal;
/** the thread id */
private final int id;
/** Constructs a new WordNetWordSenseValidationRunnable instance.
*
* @param doneSignal the count down latch that synchronizes the calling thread
* @param id the identification for this runnable
*/
public WordNetWordSenseValidationRunnable(final CountDownLatch doneSignal, final int id) {
//Preconditions
assert doneSignal != null : "doneSignal must not be null";
this.doneSignal = doneSignal;
this.id = id;
}
/** Executes this thread. */
public void run() {
RDFEntityManager rdfEntityManager = null;
try {
LOGGER.info("starting " + id);
Thread.currentThread().setName("word sense " + id);
rdfEntityManager = new RDFEntityManager(repository);
int nbrURIsProcessed = 0;
boolean isDone = false;
while (! isDone) {
final URI uri = getNextWordNetWordSenseURIToValidate();
if (uri == null) {
isDone = true;
} else {
validateWordNetWordSense(rdfEntityManager, uri);
nbrURIsProcessed++;
}
}
LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " WordNet word sense URIs");
doneSignal.countDown();
} catch (final Exception ex) {
LOGGER.error(ex.getMessage(), ex);
ex.printStackTrace();
} finally {
if (rdfEntityManager != null) {
rdfEntityManager.close();
}
}
}
}
/** Validates the given WordNet word sense.
*
* @param rdfEntityManager the RDF entity manager
* @param id the URI of the WordNet word sense to validate
*/
private void validateWordNetWordSense(final RDFEntityManager rdfEntityManager, final URI id) {
//Preconditions
assert rdfEntityManager != null : "rdfEntityManager must not be null";
assert id != null : "id must not be null";
boolean isWordSenseLogged = false;
synchronized (validatedWordNetWordSenses) {
if (validatedWordNetWordSenses.contains(id)) {
return;
} else {
validatedWordNetWordSenses.add(id);
nbrOfValidatedWordNetWordSenses++;
if (nbrOfValidatedWordNetWordSenses % 2500 == 0) {
isWordSenseLogged = true;
CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
}
}
}
final WordNetWordSense wordNetWordSense = (WordNetWordSense) rdfEntityManager.find(WordNetWordSense.class, id);
if (isWordSenseLogged) {
LOGGER.info(wordNetWordSense.description() + " by thread " + Thread.currentThread().getName());
}
// the id
if (!wordNetWordSense.getId().equals(id)) {
LOGGER.warn(wordNetWordSense + " ids are not equal");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id OK for " + wordNetWordSense.getId());
}
// the synset
if (!wordNetWordSense.getWordNetSynset().getWNWordSenses().contains(wordNetWordSense)) {
LOGGER.warn("synset " + wordNetWordSense.getWordNetSynset().getId() + " " + wordNetWordSense.getWordNetSynset().description()
+ " does not have this " + wordNetWordSense.getId()
+ " " + wordNetWordSense.description() + " as a word sense");
}
// the position of this word sense object within the list of synonymous word senses
if (wordNetWordSense.getWNWordSenseSynsetPosition() <= 0) {
LOGGER.warn(id + " invalid word sense synset position in " + wordNetWordSense.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word sense synset position " + wordNetWordSense.getWNWordSenseSynsetPosition() + " OK for " + wordNetWordSense.getId());
}
// the word for which this object is a meaning sense
if (!wordNetWordSense.getWordNetEnglishWord().getWNWordSenses().contains(wordNetWordSense)) {
LOGGER.warn("English word " + wordNetWordSense.getWordNetEnglishWord().getId()
+ " " + wordNetWordSense.getWordNetEnglishWord().getWNLemma()
+ " does not have this " + wordNetWordSense.getId()
+ " " + wordNetWordSense.description() + " as a word sense");
LOGGER.info(" WordNet word sense " + wordNetWordSense.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("English word " +wordNetWordSense.getWordNetEnglishWord().getWNLemma() + " OK for " + wordNetWordSense.getId());
}
// the cased word for which this object is a meaning sense, or null if not present
if (wordNetWordSense.getWordNetCasedEnglishWord() != null) {
if (!wordNetWordSense.getWordNetCasedEnglishWord().getWNWordSenses().contains(wordNetWordSense)) {
LOGGER.warn("cased English word " + wordNetWordSense.getWordNetCasedEnglishWord().getId()
+ " " + wordNetWordSense.getWordNetCasedEnglishWord().getWNLemma()
+ " does not have this " + wordNetWordSense.getId()
+ " " + wordNetWordSense.description() + " as a word sense");
LOGGER.info(" WordNet word sense " + wordNetWordSense.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("cased English word " +wordNetWordSense.getWordNetCasedEnglishWord().getWNLemma() + " OK for " + wordNetWordSense.getId());
}
}
// the rank of the word sense
if (wordNetWordSense.getWNWordSenseFrequencyOfUsageRank() <= 0) {
LOGGER.warn(id + " invalid usage rank in " + wordNetWordSense.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word sense usage rank " + wordNetWordSense.getWNWordSenseFrequencyOfUsageRank() + " OK for " + wordNetWordSense.getId());
}
// the lexicographer id
if (wordNetWordSense.getWNWordSenseLexicographerId() == null || wordNetWordSense.getWNWordSenseLexicographerId().isEmpty()) {
LOGGER.warn(id + " invalid lexicographer id in " + wordNetWordSense.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word sense lexicographer id " + wordNetWordSense.getWNWordSenseLexicographerId() + " OK for " + wordNetWordSense.getId());
}
// the number of times the sense is tagged in various semantic concordance texts
if (wordNetWordSense.getWNWordSenseTagCount() < 0) {
LOGGER.warn(id + " invalid tag count in " + wordNetWordSense.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word sense tag count " + wordNetWordSense.getWNWordSenseTagCount() + " OK for " + wordNetWordSense.getId());
}
// the possibly empty set of verb frames in case this is a verb word sense
for (final URI verbFrameURI : wordNetWordSense.getWNWordSenseVerbFrames()) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("verb frame URI " + verbFrameURI + " OK for " + wordNetWordSense.getId());
}
}
// the possibly empty set of sentence patterns
for (final WordNetSentencePattern wordNetSentencePattern : wordNetWordSense.getWNSentencePatterns()) {
if (!wordNetSentencePattern.getWNWordSenses().contains(wordNetWordSense)) {
LOGGER.warn("sentence pattern " + wordNetSentencePattern.getId()
+ " does not have this " + wordNetWordSense.getId()
+ " " + wordNetWordSense.description() + " as a word sense");
LOGGER.info(" WordNet word sense " + wordNetWordSense.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("sentence pattern " + wordNetSentencePattern + " OK for " + wordNetWordSense.getId());
}
}
// the antonyms
for (final WordNetWordSense wnAntonym : wordNetWordSense.getWNAntonyms()) {
wnAntonym.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" antonym " + wnAntonym + " OK");
}
}
// the also-sees
for (final WordNetWordSense wnAlsoSee : wordNetWordSense.getWNAlsoSees()) {
wnAlsoSee.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" also-see " + wnAlsoSee + " OK");
}
}
// the participles
for (final WordNetWordSense wnParticipleOfVerb : wordNetWordSense.getWNParticipleOfVerbs()) {
wnParticipleOfVerb.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" participle " + wnParticipleOfVerb + " OK");
}
}
// the pertainyms
for (final WordNetWordSense wnPertainym : wordNetWordSense.getWNPertainyms()) {
wnPertainym.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" pertainym " + wnPertainym + " OK");
}
}
// the derivations
for (final WordNetWordSense wnDerivedFromAdjective : wordNetWordSense.getWNDerivedFromAdjectives()) {
wnDerivedFromAdjective.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" derived from adjective " + wnDerivedFromAdjective + " OK");
}
}
// the domain categories
for (final WordNetWordSense wnCategoryDomain : wordNetWordSense.getWNCategoryDomains()) {
wnCategoryDomain.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" category domain " + wnCategoryDomain + " OK");
}
}
// the domain member categories
for (final WordNetWordSense wnCategoryDomainMember : wordNetWordSense.getWNCategoryDomainMembers()) {
wnCategoryDomainMember.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" category domain member " + wnCategoryDomainMember + " OK");
}
}
// the domain regions
for (final WordNetWordSense wnRegionDomain : wordNetWordSense.getWNRegionDomains()) {
wnRegionDomain.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" domain region " + wnRegionDomain + " OK");
}
}
// the domain member regions
for (final WordNetWordSense wnRegionDomainMember : wordNetWordSense.getWNRegionDomainMembers()) {
wnRegionDomainMember.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" domain region member " + wnRegionDomainMember + " OK");
}
}
// the domain usages
for (final WordNetWordSense wnUsageDomain : wordNetWordSense.getWNUsageDomains()) {
wnUsageDomain.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" usage domain " + wnUsageDomain + " OK");
}
}
// the domain member usages
for (final WordNetWordSense wnUsageDomainMember : wordNetWordSense.getWNUsageDomainMembers()) {
wnUsageDomainMember.toString(); // lazy load it
if (LOGGER.isDebugEnabled()) {
LOGGER.debug(" usage domain member " + wnUsageDomainMember + " OK");
}
}
}
/** Validates the WordNet cased English words. */
private void validateWordNetCasedEnglishWords() {
LOGGER.info("Querying the WordNet cased English word URIs");
try {
final RepositoryConnection repositoryConnection = repository.getConnection();
final String queryString =
"SELECT s FROM {s} rdf:type {<http://texai.org/texai/org.texai.wordnet.domain.entity.WordNetCasedEnglishWord>}";
LOGGER.info("query " + queryString);
final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
while (tupleQueryResult.hasNext()) {
wordNetCasedEnglishWordURIsToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
}
tupleQueryResult.close();
LOGGER.info("closing the query repository connection");
repositoryConnection.close();
if (wordNetCasedEnglishWordURIsToValidate.isEmpty()) {
throw new TexaiException("no WordNet cased English word URIs selected");
}
} catch (final MalformedQueryException ex) {
throw new TexaiException(ex);
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
} catch (final OpenRDFException ex) {
throw new TexaiException(ex);
}
final int wordNetCasedEnglishWordURIsToValidate_size = wordNetCasedEnglishWordURIsToValidate.size();
LOGGER.info("Found " + wordNetCasedEnglishWordURIsToValidate_size + " WordNet cased English word URIs");
wordNetCasedEnglishWordURIsToValidate_iter = wordNetCasedEnglishWordURIsToValidate.iterator();
final long startMillis = System.currentTimeMillis();
final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
for (int i = 0; i < NBR_THREADS; i++) {
executor.execute(new WordNetCasedEnglishWordValidationRunnable(doneSignal, i + 1));
}
try {
doneSignal.await();
} catch (InterruptedException ex) {
throw new TexaiException(ex);
}
double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
if (secondsDuration == 0) {
secondsDuration = 1;
}
LOGGER.info("Validated " + wordNetCasedEnglishWordURIsToValidate_size + " at the rate of " + wordNetCasedEnglishWordURIsToValidate_size / secondsDuration + " per second");
LOGGER.info("all WordNet cased English word validation activities completed");
}
/** Gets the next WordNet cased English word URI to validate.
*
* @return the next WordNet cased English word URI to validate, or null when done
*/
private URI getNextWordNetCasedEnglishWordURIToValidate() {
synchronized(wordNetCasedEnglishWordURIsToValidate_iter) {
if (wordNetCasedEnglishWordURIsToValidate_iter.hasNext()) {
return wordNetCasedEnglishWordURIsToValidate_iter.next();
} else {
return null;
}
}
}
/** A parallel runnable that validates WordNet cased English word URIs which it obtains from the shared iterator. */
@Immutable
class WordNetCasedEnglishWordValidationRunnable implements Runnable {
/** the count down latch that synchronizes the calling thread */
private final CountDownLatch doneSignal;
/** the thread id */
private final int id;
/** Constructs a new WordNetCasedEnglishWordValidationRunnable instance.
*
* @param doneSignal the count down latch that synchronizes the calling thread
* @param id the identification for this runnable
*/
public WordNetCasedEnglishWordValidationRunnable(final CountDownLatch doneSignal, final int id) {
//Preconditions
assert doneSignal != null : "doneSignal must not be null";
this.doneSignal = doneSignal;
this.id = id;
}
/** Executes this thread. */
public void run() {
RDFEntityManager rdfEntityManager = null;
try {
LOGGER.info("starting " + id);
Thread.currentThread().setName("synset " + id);
rdfEntityManager = new RDFEntityManager(repository);
int nbrURIsProcessed = 0;
boolean isDone = false;
while (! isDone) {
final URI uri = getNextWordNetCasedEnglishWordURIToValidate();
if (uri == null) {
isDone = true;
} else {
validateWordNetCasedEnglishWord(rdfEntityManager, uri);
nbrURIsProcessed++;
}
}
LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " WordNet cased English word URIs");
doneSignal.countDown();
} catch (final Exception ex) {
LOGGER.error(ex.getMessage(), ex);
ex.printStackTrace();
} finally {
if (rdfEntityManager != null) {
rdfEntityManager.close();
}
}
}
}
/** Validates the given WordNet cased English word.
*
* @param rdfEntityManager the RDF entity manager
* @param id the URI of the WordNet cased English word to validate
*/
private void validateWordNetCasedEnglishWord(final RDFEntityManager rdfEntityManager, final URI id) {
//Preconditions
assert id != null : "id must not be null";
boolean isCasedEnglishWordLogged = false;
synchronized (wordNetCasedEnglishWordURIsToValidate) {
nbrOfValidatedWordNetCasedEnglishWords++;
if (nbrOfValidatedWordNetCasedEnglishWords % 2500 == 0) {
isCasedEnglishWordLogged = true;
CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
}
}
final WordNetCasedEnglishWord wordNetCasedEnglishWord = (WordNetCasedEnglishWord) rdfEntityManager.find(WordNetCasedEnglishWord.class, id);
if (isCasedEnglishWordLogged) {
LOGGER.info(wordNetCasedEnglishWord + " by thread " + Thread.currentThread().getName());
}
// the id
if (!wordNetCasedEnglishWord.getId().equals(id)) {
LOGGER.warn(wordNetCasedEnglishWord + " ids are not equal");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id OK for " + wordNetCasedEnglishWord.getId());
}
// the word id
if (wordNetCasedEnglishWord.getWNCasedWordId() <= 0) {
LOGGER.warn(id + " invalid word id in " + wordNetCasedEnglishWord.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word id " + wordNetCasedEnglishWord.getWNCasedWordId() + " OK for " + wordNetCasedEnglishWord.getId());
}
// the lemma
if (wordNetCasedEnglishWord.getWNLemma() == null || wordNetCasedEnglishWord.getWNLemma().isEmpty()) {
LOGGER.warn(id + " invalid lemma in " + wordNetCasedEnglishWord.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("lemma " + wordNetCasedEnglishWord.getWNLemma() + " OK for " + wordNetCasedEnglishWord.getId());
}
// the word senses
for (final WordNetWordSense wordNetWordSense : wordNetCasedEnglishWord.getWNWordSenses()) {
if (!wordNetWordSense.getWordNetCasedEnglishWord().equals(wordNetCasedEnglishWord)) {
LOGGER.warn(id + " invalid wordsense in " + wordNetCasedEnglishWord.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word sense " + wordNetWordSense.description() + " OK for " + wordNetCasedEnglishWord.getId());
}
}
}
/** Validates the WordNet categories. */
private void validateWordNetCategories() {
LOGGER.info("Querying the WordNet category URIs");
try {
final RepositoryConnection repositoryConnection = repository.getConnection();
final String queryString =
"SELECT s FROM {s} rdf:type {<http://texai.org/texai/org.texai.wordnet.domain.entity.WordNetCategory>}";
LOGGER.info("query " + queryString);
final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
while (tupleQueryResult.hasNext()) {
wordNetCategoryURIsToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
}
tupleQueryResult.close();
LOGGER.info("closing the query repository connection");
repositoryConnection.close();
if (wordNetCategoryURIsToValidate.isEmpty()) {
throw new TexaiException("no WordNet category URIs selected");
}
} catch (final MalformedQueryException ex) {
throw new TexaiException(ex);
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
} catch (final OpenRDFException ex) {
throw new TexaiException(ex);
}
final int wordNetCategoryURIsToValidate_size = wordNetCategoryURIsToValidate.size();
LOGGER.info("Found " + wordNetCategoryURIsToValidate_size + " WordNet category URIs");
wordNetCategoryURIsToValidate_iter = wordNetCategoryURIsToValidate.iterator();
final long startMillis = System.currentTimeMillis();
final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
for (int i = 0; i < NBR_THREADS; i++) {
executor.execute(new WordNetCategoryValidationRunnable(doneSignal, i + 1));
}
try {
doneSignal.await();
} catch (InterruptedException ex) {
throw new TexaiException(ex);
}
double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
if (secondsDuration == 0) {
secondsDuration = 1;
}
LOGGER.info("Validated " + wordNetCategoryURIsToValidate_size + " at the rate of " + wordNetCategoryURIsToValidate_size / secondsDuration + " per second");
LOGGER.info("all WordNet category validation activities completed");
}
/** Gets the next WordNet category URI to validate.
*
* @return the next WordNet category URI to validate, or null when done
*/
private URI getNextWordNetCategoryURIToValidate() {
synchronized(wordNetCategoryURIsToValidate_iter) {
if (wordNetCategoryURIsToValidate_iter.hasNext()) {
return wordNetCategoryURIsToValidate_iter.next();
} else {
return null;
}
}
}
/** A parallel runnable that validates WordNet category URIs which it obtains from the shared iterator. */
@Immutable
class WordNetCategoryValidationRunnable implements Runnable {
/** the count down latch that synchronizes the calling thread */
private final CountDownLatch doneSignal;
/** the thread id */
private final int id;
/** Constructs a new WordNetCasedEnglishWordValidationRunnable instance.
*
* @param doneSignal the count down latch that synchronizes the calling thread
* @param id the identification for this runnable
*/
public WordNetCategoryValidationRunnable(final CountDownLatch doneSignal, final int id) {
//Preconditions
assert doneSignal != null : "doneSignal must not be null";
this.doneSignal = doneSignal;
this.id = id;
}
/** Executes this thread. */
public void run() {
RDFEntityManager rdfEntityManager = null;
try {
LOGGER.info("starting " + id);
Thread.currentThread().setName("synset " + id);
rdfEntityManager = new RDFEntityManager(repository);
int nbrURIsProcessed = 0;
boolean isDone = false;
while (! isDone) {
final URI uri = getNextWordNetCategoryURIToValidate();
if (uri == null) {
isDone = true;
} else {
validateWordNetCategory(rdfEntityManager, uri);
nbrURIsProcessed++;
}
}
LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " WordNet category URIs");
doneSignal.countDown();
} catch (final Exception ex) {
LOGGER.error(ex.getMessage(), ex);
ex.printStackTrace();
} finally {
if (rdfEntityManager != null) {
rdfEntityManager.close();
}
}
}
}
/** Validates the given WordNet category.
*
* @param rdfEntityManager the RDF entity manager
* @param id the URI of the WordNet category to validate
*/
private void validateWordNetCategory(final RDFEntityManager rdfEntityManager, final URI id) {
//Preconditions
assert id != null : "id must not be null";
CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
final WordNetCategory wordNetCategory = (WordNetCategory) rdfEntityManager.find(WordNetCategory.class, id);
LOGGER.info(wordNetCategory.getName() + " by thread " + Thread.currentThread().getName());
// the id
if (!wordNetCategory.getId().equals(id)) {
LOGGER.warn(wordNetCategory + " ids are not equal");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id OK for " + wordNetCategory.getId());
}
// the category name
if (wordNetCategory.getName() == null || wordNetCategory.getName().isEmpty()) {
LOGGER.warn(id + " invalid name in " + wordNetCategory.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("name " + wordNetCategory.getName() + " OK for " + wordNetCategory.getId());
}
// the set of synsets in this category
for (final WordNetSynset wordNetSynset : wordNetCategory.getWNSynsets()) {
if (!wordNetSynset.getWNCategory().equals(wordNetCategory)) {
LOGGER.warn(id + " invalid category in " + wordNetSynset.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("synset " + wordNetSynset.description() + " OK for " + wordNetCategory.getId());
}
}
}
/** Validates the WordNet English words. */
private void validateWordNetEnglishWords() {
LOGGER.info("Querying the WordNet English word URIs");
try {
final RepositoryConnection repositoryConnection = repository.getConnection();
final String queryString =
"SELECT s FROM {s} rdf:type {<http://sw.cyc.com/2006/07/27/cyc/WordNetEnglishWord>}";
LOGGER.info("query " + queryString);
final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
while (tupleQueryResult.hasNext()) {
wordNetEnglishWordURIsToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
}
tupleQueryResult.close();
LOGGER.info("closing the query repository connection");
repositoryConnection.close();
if (wordNetEnglishWordURIsToValidate.isEmpty()) {
throw new TexaiException("no WordNet English word URIs selected");
}
} catch (final MalformedQueryException ex) {
throw new TexaiException(ex);
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
} catch (final OpenRDFException ex) {
throw new TexaiException(ex);
}
final int wordNetEnglishWordURIsToValidate_size = wordNetEnglishWordURIsToValidate.size();
LOGGER.info("Found " + wordNetEnglishWordURIsToValidate_size + " WordNet English word URIs");
wordNetEnglishWordURIsToValidate_iter = wordNetEnglishWordURIsToValidate.iterator();
final long startMillis = System.currentTimeMillis();
final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
for (int i = 0; i < NBR_THREADS; i++) {
executor.execute(new WordNetEnglishWordValidationRunnable(doneSignal, i + 1));
}
try {
doneSignal.await();
} catch (InterruptedException ex) {
throw new TexaiException(ex);
}
double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
if (secondsDuration == 0) {
secondsDuration = 1;
}
LOGGER.info("Validated " + wordNetEnglishWordURIsToValidate_size + " at the rate of " + wordNetEnglishWordURIsToValidate_size / secondsDuration + " per second");
LOGGER.info("all WordNet English word validation activities completed");
}
/** Gets the next WordNet English word URI to validate.
*
* @return the next WordNet English word URI to validate, or null when done
*/
private URI getNextWordNetEnglishWordURIToValidate() {
synchronized(wordNetEnglishWordURIsToValidate_iter) {
if (wordNetEnglishWordURIsToValidate_iter.hasNext()) {
return wordNetEnglishWordURIsToValidate_iter.next();
} else {
return null;
}
}
}
/** A parallel runnable that validates WordNet English word URIs which it obtains from the shared iterator. */
@Immutable
class WordNetEnglishWordValidationRunnable implements Runnable {
/** the count down latch that synchronizes the calling thread */
private final CountDownLatch doneSignal;
/** the thread id */
private final int id;
/** Constructs a new WordNetCasedEnglishWordValidationRunnable instance.
*
* @param doneSignal the count down latch that synchronizes the calling thread
* @param id the identification for this runnable
*/
public WordNetEnglishWordValidationRunnable(final CountDownLatch doneSignal, final int id) {
//Preconditions
assert doneSignal != null : "doneSignal must not be null";
this.doneSignal = doneSignal;
this.id = id;
}
/** Executes this thread. */
public void run() {
RDFEntityManager rdfEntityManager = null;
try {
LOGGER.info("starting " + id);
Thread.currentThread().setName("synset " + id);
rdfEntityManager = new RDFEntityManager(repository);
int nbrURIsProcessed = 0;
boolean isDone = false;
while (! isDone) {
final URI uri = getNextWordNetEnglishWordURIToValidate();
if (uri == null) {
isDone = true;
} else {
validateWordNetEnglishWord(rdfEntityManager, uri);
nbrURIsProcessed++;
}
}
LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " WordNet English word URIs");
doneSignal.countDown();
} catch (final Exception ex) {
LOGGER.error(ex.getMessage(), ex);
ex.printStackTrace();
} finally {
if (rdfEntityManager != null) {
rdfEntityManager.close();
}
}
}
}
/** Validates the given WordNet English word.
*
* @param rdfEntityManager the RDF entity manager
* @param id the URI of the WordNet English word to validate
*/
private void validateWordNetEnglishWord(final RDFEntityManager rdfEntityManager, final URI id) {
//Preconditions
assert id != null : "id must not be null";
boolean isLogged = false;
synchronized (wordNetEnglishWordURIsToValidate) {
nbrOfValidatedWordNetEnglishWords++;
if (nbrOfValidatedWordNetEnglishWords % 2500 == 0) {
isLogged = true;
CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
}
}
final WordNetEnglishWord wordNetEnglishWord = (WordNetEnglishWord) rdfEntityManager.find(WordNetEnglishWord.class, id);
if (isLogged) {
LOGGER.info(wordNetEnglishWord + " by thread " + Thread.currentThread().getName());
}
// the id
if (!wordNetEnglishWord.getId().equals(id)) {
LOGGER.warn(wordNetEnglishWord + " ids are not equal");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id OK for " + wordNetEnglishWord.getId());
}
// the word id
if (wordNetEnglishWord.getWNWordId() <= 0) {
LOGGER.warn(id + " invalid word id in " + wordNetEnglishWord.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word id " + wordNetEnglishWord.getWNWordId() + " OK for " + wordNetEnglishWord.getId());
}
// the lemma
if (wordNetEnglishWord.getWNLemma() == null || wordNetEnglishWord.getWNLemma().isEmpty()) {
LOGGER.warn(id + " invalid lemma in " + wordNetEnglishWord.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("lemma " + wordNetEnglishWord.getWNLemma() + " OK for " + wordNetEnglishWord.getId());
}
// the word senses
for (final WordNetWordSense wordNetWordSense : wordNetEnglishWord.getWNWordSenses()) {
if (!wordNetWordSense.getWordNetEnglishWord().equals(wordNetEnglishWord)) {
LOGGER.warn(id + " invalid word sense in " + wordNetEnglishWord.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word sense " + wordNetWordSense.description() + " OK for " + wordNetEnglishWord.getId());
}
}
// the morphological variations
for (final WordNetEnglishWordMorph wordNetEnglishWordMorph : wordNetEnglishWord.getWNWordMorphs()) {
if (!wordNetEnglishWordMorph.getWnWordForMorph().equals(wordNetEnglishWord)) {
LOGGER.warn(id + " invalid word morph in " + wordNetEnglishWord.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word morph " + wordNetEnglishWordMorph.getWnMorphSpelling() + " OK for " + wordNetEnglishWord.getId());
}
}
}
/** Validates the WordNet English word morphological variations. */
private void validateWordNetEnglishWordMorphs() {
LOGGER.info("Querying the WordNet English word morp URIs");
try {
final RepositoryConnection repositoryConnection = repository.getConnection();
final String queryString =
"SELECT s FROM {s} rdf:type {<http://texai.org/texai/org.texai.wordnet.domain.entity.WordNetEnglishWordMorph>}";
LOGGER.info("query " + queryString);
final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
while (tupleQueryResult.hasNext()) {
wordNetEnglishWordMorphURIsToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
}
tupleQueryResult.close();
LOGGER.info("closing the query repository connection");
repositoryConnection.close();
if (wordNetEnglishWordMorphURIsToValidate.isEmpty()) {
throw new TexaiException("no WordNet English word morp URIs selected");
}
} catch (final MalformedQueryException ex) {
throw new TexaiException(ex);
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
} catch (final OpenRDFException ex) {
throw new TexaiException(ex);
}
final int wordNetEnglishWordMorphURIsToValidate_size = wordNetEnglishWordMorphURIsToValidate.size();
LOGGER.info("Found " + wordNetEnglishWordMorphURIsToValidate_size + " WordNet English word morp URIs");
wordNetEnglishWordMorphURIsToValidate_iter = wordNetEnglishWordMorphURIsToValidate.iterator();
final long startMillis = System.currentTimeMillis();
final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
for (int i = 0; i < NBR_THREADS; i++) {
executor.execute(new WordNetEnglishWordMorphValidationRunnable(doneSignal, i + 1));
}
try {
doneSignal.await();
} catch (InterruptedException ex) {
throw new TexaiException(ex);
}
double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
if (secondsDuration == 0) {
secondsDuration = 1;
}
LOGGER.info("Validated " + wordNetEnglishWordMorphURIsToValidate_size + " at the rate of " + wordNetEnglishWordMorphURIsToValidate_size / secondsDuration + " per second");
LOGGER.info("all WordNet English word morp validation activities completed");
}
/** Gets the next WordNet English word morp URI to validate.
*
* @return the next WordNet English word morp URI to validate, or null when done
*/
private URI getNextWordNetEnglishWordMorphURIToValidate() {
synchronized(wordNetEnglishWordMorphURIsToValidate_iter) {
if (wordNetEnglishWordMorphURIsToValidate_iter.hasNext()) {
return wordNetEnglishWordMorphURIsToValidate_iter.next();
} else {
return null;
}
}
}
/** A parallel runnable that validates WordNet English word morp URIs which it obtains from the shared iterator. */
@Immutable
class WordNetEnglishWordMorphValidationRunnable implements Runnable {
/** the count down latch that synchronizes the calling thread */
private final CountDownLatch doneSignal;
/** the thread id */
private final int id;
/** Constructs a new WordNetCasedEnglishWordMorphValidationRunnable instance.
*
* @param doneSignal the count down latch that synchronizes the calling thread
* @param id the identification for this runnable
*/
public WordNetEnglishWordMorphValidationRunnable(final CountDownLatch doneSignal, final int id) {
//Preconditions
assert doneSignal != null : "doneSignal must not be null";
this.doneSignal = doneSignal;
this.id = id;
}
/** Executes this thread. */
public void run() {
RDFEntityManager rdfEntityManager = null;
try {
LOGGER.info("starting " + id);
Thread.currentThread().setName("synset " + id);
rdfEntityManager = new RDFEntityManager(repository);
int nbrURIsProcessed = 0;
boolean isDone = false;
while (! isDone) {
final URI uri = getNextWordNetEnglishWordMorphURIToValidate();
if (uri == null) {
isDone = true;
} else {
validateWordNetEnglishWordMorph(rdfEntityManager, uri);
nbrURIsProcessed++;
}
}
LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " WordNet English word morp URIs");
doneSignal.countDown();
} catch (final Exception ex) {
LOGGER.error(ex.getMessage(), ex);
ex.printStackTrace();
} finally {
if (rdfEntityManager != null) {
rdfEntityManager.close();
}
}
}
}
/** Validates the given WordNet English word morp.
*
* @param rdfEntityManager the RDF entity manager
* @param id the URI of the WordNet English word morp to validate
*/
private void validateWordNetEnglishWordMorph(final RDFEntityManager rdfEntityManager, final URI id) {
//Preconditions
assert id != null : "id must not be null";
boolean isLogged = false;
synchronized (wordNetEnglishWordMorphURIsToValidate) {
nbrOfValidatedWordNetEnglishWordMorphs++;
if (nbrOfValidatedWordNetEnglishWordMorphs % 2500 == 0) {
isLogged = true;
CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
}
}
final WordNetEnglishWordMorph wordNetEnglishWordMorph = (WordNetEnglishWordMorph) rdfEntityManager.find(WordNetEnglishWordMorph.class, id);
if (isLogged) {
LOGGER.info(wordNetEnglishWordMorph + " by thread " + Thread.currentThread().getName());
}
// the id
if (!wordNetEnglishWordMorph.getId().equals(id)) {
LOGGER.warn(wordNetEnglishWordMorph + " ids are not equal");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id OK for " + wordNetEnglishWordMorph.getId());
}
// the word for which this object is a morphological variation
if (!wordNetEnglishWordMorph.getWnWordForMorph().getWNWordMorphs().contains(wordNetEnglishWordMorph)) {
LOGGER.warn(id + " invalid English word morph in " + wordNetEnglishWordMorph.getWnWordForMorph().getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("English word " + wordNetEnglishWordMorph.getWnWordForMorph().getWNLemma() + " OK for " + wordNetEnglishWordMorph.getWnMorphSpelling());
}
// the spelling variation
if (wordNetEnglishWordMorph.getWnMorphSpelling() == null || wordNetEnglishWordMorph.getWnMorphSpelling().isEmpty()) {
LOGGER.warn(id + " invalid spelling in " + wordNetEnglishWordMorph.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("spelling " + wordNetEnglishWordMorph.getWnMorphSpelling() + " OK for " + wordNetEnglishWordMorph.getId());
}
// the part of speech
if (wordNetEnglishWordMorph.getWnMorphSpeechPart() == null) {
LOGGER.warn(id + " invalid speech part in " + wordNetEnglishWordMorph.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("speech part " + wordNetEnglishWordMorph.getWnMorphSpeechPart() + " OK for " + wordNetEnglishWordMorph.getWnMorphSpelling());
}
// the position of this object in the list of spelling variations for the associated English word
if (wordNetEnglishWordMorph.getWnMorphIndex() <= 0) {
LOGGER.warn(id + " invalid index in " + wordNetEnglishWordMorph.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("index " + wordNetEnglishWordMorph.getWnMorphIndex() + " OK for " + wordNetEnglishWordMorph.getId());
}
}
/** Validates the WordNet sample phrase items. */
private void validateWordNetSamplePhraseItems() {
LOGGER.info("Querying the WordNet sample phrase item URIs");
try {
final RepositoryConnection repositoryConnection = repository.getConnection();
final String queryString =
"SELECT s FROM {s} rdf:type {<http://texai.org/texai/org.texai.wordnet.domain.entity.WordNetSamplePhraseItem>}";
LOGGER.info("query " + queryString);
final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
while (tupleQueryResult.hasNext()) {
wordNetSamplePhraseItemURIsToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
}
tupleQueryResult.close();
LOGGER.info("closing the query repository connection");
repositoryConnection.close();
if (wordNetSamplePhraseItemURIsToValidate.isEmpty()) {
throw new TexaiException("no WordNet sample phrase item URIs selected");
}
} catch (final MalformedQueryException ex) {
throw new TexaiException(ex);
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
} catch (final OpenRDFException ex) {
throw new TexaiException(ex);
}
final int wordNetSamplePhraseItemURIsToValidate_size = wordNetSamplePhraseItemURIsToValidate.size();
LOGGER.info("Found " + wordNetSamplePhraseItemURIsToValidate_size + " WordNet sample phrase item URIs");
wordNetSamplePhraseItemURIsToValidate_iter = wordNetSamplePhraseItemURIsToValidate.iterator();
final long startMillis = System.currentTimeMillis();
final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
for (int i = 0; i < NBR_THREADS; i++) {
executor.execute(new WordNetSamplePhraseItemValidationRunnable(doneSignal, i + 1));
}
try {
doneSignal.await();
} catch (InterruptedException ex) {
throw new TexaiException(ex);
}
double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
if (secondsDuration == 0) {
secondsDuration = 1;
}
LOGGER.info("Validated " + wordNetSamplePhraseItemURIsToValidate_size + " at the rate of " + wordNetSamplePhraseItemURIsToValidate_size / secondsDuration + " per second");
LOGGER.info("all WordNet sample phrase item validation activities completed");
}
/** Gets the next WordNet sample phrase item URI to validate.
*
* @return the next WordNet sample phrase item URI to validate, or null when done
*/
private URI getNextWordNetSamplePhraseItemURIToValidate() {
synchronized(wordNetSamplePhraseItemURIsToValidate_iter) {
if (wordNetSamplePhraseItemURIsToValidate_iter.hasNext()) {
return wordNetSamplePhraseItemURIsToValidate_iter.next();
} else {
return null;
}
}
}
/** A parallel runnable that validates WordNet sample phrase item URIs which it obtains from the shared iterator. */
@Immutable
class WordNetSamplePhraseItemValidationRunnable implements Runnable {
/** the count down latch that synchronizes the calling thread */
private final CountDownLatch doneSignal;
/** the thread id */
private final int id;
/** Constructs a new WordNetCasedSamplePhraseItemValidationRunnable instance.
*
* @param doneSignal the count down latch that synchronizes the calling thread
* @param id the identification for this runnable
*/
public WordNetSamplePhraseItemValidationRunnable(final CountDownLatch doneSignal, final int id) {
//Preconditions
assert doneSignal != null : "doneSignal must not be null";
this.doneSignal = doneSignal;
this.id = id;
}
/** Executes this thread. */
public void run() {
RDFEntityManager rdfEntityManager = null;
try {
LOGGER.info("starting " + id);
Thread.currentThread().setName("synset " + id);
rdfEntityManager = new RDFEntityManager(repository);
int nbrURIsProcessed = 0;
boolean isDone = false;
while (! isDone) {
final URI uri = getNextWordNetSamplePhraseItemURIToValidate();
if (uri == null) {
isDone = true;
} else {
validateWordNetSamplePhraseItem(rdfEntityManager, uri);
nbrURIsProcessed++;
}
}
LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " WordNet sample phrase item URIs");
doneSignal.countDown();
} catch (final Exception ex) {
LOGGER.error(ex.getMessage(), ex);
ex.printStackTrace();
} finally {
if (rdfEntityManager != null) {
rdfEntityManager.close();
}
}
}
}
/** Validates the given WordNet sample phrase item.
*
* @param rdfEntityManager the RDF entity manager
* @param id the URI of the WordNet sample phrase item to validate
*/
private void validateWordNetSamplePhraseItem(final RDFEntityManager rdfEntityManager, final URI id) {
//Preconditions
assert id != null : "id must not be null";
boolean isLogged = false;
synchronized (wordNetSamplePhraseItemURIsToValidate) {
nbrOfValidatedWordNetSamplePhraseItems++;
if (nbrOfValidatedWordNetSamplePhraseItems % 2500 == 0) {
isLogged = true;
CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
}
}
final WordNetSamplePhraseItem wordNetSamplePhraseItem = (WordNetSamplePhraseItem) rdfEntityManager.find(WordNetSamplePhraseItem.class, id);
if (isLogged) {
LOGGER.info(wordNetSamplePhraseItem + " by thread " + Thread.currentThread().getName());
}
// the id
if (!wordNetSamplePhraseItem.getId().equals(id)) {
LOGGER.warn(wordNetSamplePhraseItem + " ids are not equal");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id OK for " + wordNetSamplePhraseItem.getId());
}
// the object representing the set of synonymous word senses
if (!wordNetSamplePhraseItem.getWnSamplePhraseItemSynset().getWNSynsetSamplePhraseItems().contains(wordNetSamplePhraseItem)) {
LOGGER.warn(id + " invalid sample phrase in " + wordNetSamplePhraseItem.getWnSamplePhraseItemSynset().getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("synset " + wordNetSamplePhraseItem.getWnSamplePhraseItemSynset().description() + " OK for " + wordNetSamplePhraseItem.getWnSamplePhrase());
}
// the sample phrase id
if (wordNetSamplePhraseItem.getWnWordNetSamplePhraseId() <= 0) {
LOGGER.warn(id + " invalid id in " + wordNetSamplePhraseItem.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id " + wordNetSamplePhraseItem.getWnWordNetSamplePhraseId() + " OK for " + wordNetSamplePhraseItem.getId());
}
// the sample phrase
if (wordNetSamplePhraseItem.getWnSamplePhrase() == null || wordNetSamplePhraseItem.getWnSamplePhrase().isEmpty()) {
LOGGER.warn(id + " invalid sample phrase in " + wordNetSamplePhraseItem.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("sample phrase " + wordNetSamplePhraseItem.getWnSamplePhrase() + " OK for " + wordNetSamplePhraseItem.getId());
}
}
/** Validates the WordNet sentence patterns. */
private void validateWordNetSentencePatterns() {
LOGGER.info("Querying the WordNet sentence pattern URIs");
try {
final RepositoryConnection repositoryConnection = repository.getConnection();
final String queryString =
"SELECT s FROM {s} rdf:type {<http://texai.org/texai/org.texai.wordnet.domain.entity.WordNetSentencePattern>}";
LOGGER.info("query " + queryString);
final TupleQuery tupleQuery = repositoryConnection.prepareTupleQuery(QueryLanguage.SERQL, queryString);
final TupleQueryResult tupleQueryResult = tupleQuery.evaluate();
while (tupleQueryResult.hasNext()) {
wordNetSentencePatternURIsToValidate.add((URI) tupleQueryResult.next().getBinding("s").getValue());
}
tupleQueryResult.close();
LOGGER.info("closing the query repository connection");
repositoryConnection.close();
if (wordNetSentencePatternURIsToValidate.isEmpty()) {
throw new TexaiException("no WordNet sentence pattern URIs selected");
}
} catch (final MalformedQueryException ex) {
throw new TexaiException(ex);
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
} catch (final OpenRDFException ex) {
throw new TexaiException(ex);
}
final int wordNetSentencePatternURIsToValidate_size = wordNetSentencePatternURIsToValidate.size();
LOGGER.info("Found " + wordNetSentencePatternURIsToValidate_size + " WordNet sentence pattern URIs");
wordNetSentencePatternURIsToValidate_iter = wordNetSentencePatternURIsToValidate.iterator();
final long startMillis = System.currentTimeMillis();
final CountDownLatch doneSignal = new CountDownLatch(NBR_THREADS);
for (int i = 0; i < NBR_THREADS; i++) {
executor.execute(new WordNetSentencePatternValidationRunnable(doneSignal, i + 1));
}
try {
doneSignal.await();
} catch (InterruptedException ex) {
throw new TexaiException(ex);
}
double secondsDuration = (float) ((System.currentTimeMillis() - startMillis)) / 1000.0d;
if (secondsDuration == 0) {
secondsDuration = 1;
}
LOGGER.info("Validated " + wordNetSentencePatternURIsToValidate_size + " at the rate of " + wordNetSentencePatternURIsToValidate_size / secondsDuration + " per second");
LOGGER.info("all WordNet sentence pattern validation activities completed");
}
/** Gets the next WordNet sentence pattern URI to validate.
*
* @return the next WordNet sentence pattern URI to validate, or null when done
*/
private URI getNextWordNetSentencePatternURIToValidate() {
synchronized(wordNetSentencePatternURIsToValidate_iter) {
if (wordNetSentencePatternURIsToValidate_iter.hasNext()) {
return wordNetSentencePatternURIsToValidate_iter.next();
} else {
return null;
}
}
}
/** A parallel runnable that validates WordNet sentence pattern URIs which it obtains from the shared iterator. */
@Immutable
class WordNetSentencePatternValidationRunnable implements Runnable {
/** the count down latch that synchronizes the calling thread */
private final CountDownLatch doneSignal;
/** the thread id */
private final int id;
/** Constructs a new WordNetCasedSentencePatternValidationRunnable instance.
*
* @param doneSignal the count down latch that synchronizes the calling thread
* @param id the identification for this runnable
*/
public WordNetSentencePatternValidationRunnable(final CountDownLatch doneSignal, final int id) {
//Preconditions
assert doneSignal != null : "doneSignal must not be null";
this.doneSignal = doneSignal;
this.id = id;
}
/** Executes this thread. */
public void run() {
RDFEntityManager rdfEntityManager = null;
try {
LOGGER.info("starting " + id);
Thread.currentThread().setName("synset " + id);
rdfEntityManager = new RDFEntityManager(repository);
int nbrURIsProcessed = 0;
boolean isDone = false;
while (! isDone) {
final URI uri = getNextWordNetSentencePatternURIToValidate();
if (uri == null) {
isDone = true;
} else {
validateWordNetSentencePattern(rdfEntityManager, uri);
nbrURIsProcessed++;
}
}
LOGGER.info("Thread " + id + " completed " + nbrURIsProcessed + " WordNet sentence pattern URIs");
doneSignal.countDown();
} catch (final Exception ex) {
LOGGER.error(ex.getMessage(), ex);
ex.printStackTrace();
} finally {
if (rdfEntityManager != null) {
rdfEntityManager.close();
}
}
}
}
/** Validates the given WordNet sentence pattern.
*
* @param rdfEntityManager the RDF entity manager
* @param id the URI of the WordNet sentence pattern to validate
*/
private void validateWordNetSentencePattern(final RDFEntityManager rdfEntityManager, final URI id) {
//Preconditions
assert id != null : "id must not be null";
CacheInitializer.resetCache(Constants.CACHE_CONNECTED_RDF_ENTITIES);
final WordNetSentencePattern wordNetSentencePattern = (WordNetSentencePattern) rdfEntityManager.find(WordNetSentencePattern.class, id);
LOGGER.info(wordNetSentencePattern + " by thread " + Thread.currentThread().getName());
// the id
if (!wordNetSentencePattern.getId().equals(id)) {
LOGGER.warn(wordNetSentencePattern + " ids are not equal");
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id OK for " + wordNetSentencePattern.getId());
}
// the sentence pattern id
if (wordNetSentencePattern.getWNWordNetSentencePatternId() <= 0) {
LOGGER.warn(id + " invalid id in " + wordNetSentencePattern.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("id " + wordNetSentencePattern.getWNWordNetSentencePatternId() + " OK for " + wordNetSentencePattern.getId());
}
// the sentence pattern
if (wordNetSentencePattern.getWNSentencePatternString() == null || wordNetSentencePattern.getWNSentencePatternString().isEmpty()) {
LOGGER.warn(id + " invalid sentence pattern in " + wordNetSentencePattern.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("sentence pattern " + wordNetSentencePattern.getWNSentencePatternString() + " OK for " + wordNetSentencePattern.getId());
}
// the set of word senses
for (final WordNetWordSense wordNetWordSense : wordNetSentencePattern.getWNWordSenses()) {
if (!wordNetWordSense.getWNSentencePatterns().contains(wordNetSentencePattern)) {
LOGGER.warn(id + " invalid sentence pattern in " + wordNetWordSense.getId());
} else if (LOGGER.isDebugEnabled()) {
LOGGER.debug("word sense " + wordNetWordSense.description() + " OK for " + wordNetSentencePattern.getWNSentencePatternString());
}
}
}
/** Finalizes this application. */
public void finalization() {
executor.shutdown();
CacheManager.getInstance().shutdown();
try {
repository.shutDown();
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
}
LOGGER.info("ValidateWordNetEntities completed");
}
/** Executes this application.
*
* @param args the command line arguments (unused)
*/
public static void main(final String[] args) {
final ValidateWordNetEntities validateWordNetEntities = new ValidateWordNetEntities();
validateWordNetEntities.initialize();
validateWordNetEntities.validate();
validateWordNetEntities.finalization();
System.exit(0);
}
}
See more files for this project here