Tuesday, February 14, 2012

THREDDS Crawler Class written in Groovy

Ported from Java:

TdsCrawler.groovy

package util

import java.util.ArrayList
import java.util.List

import javax.naming.Context

import org.apache.log4j.Logger

import thredds.catalog.InvAccess
import thredds.catalog.InvCatalogRef
import thredds.catalog.InvDataset
import thredds.catalog.InvService
import thredds.catalog.ServiceType
import thredds.catalog.crawl.CatalogCrawler

public class TdsCrawler {
def logger

private Map urlOpenDAPMap = new HashMap()
private boolean nestedCrawl = false
private String topCatalogUrl = ""

public void setNested(boolean nested) {
nestedCrawl = nested
}

private void doHarvest(InvDataset ids) {

java.util.List access = ids.getAccess()
if (access.size() > 0) {

for (InvAccess a : access) {
InvService s = a.getService()
ServiceType stype = s.getServiceType()
if (stype == ServiceType.OPENDAP) {
String urlString = a.getStandardUrlName()
String dsCatalogUrl = ids.getCatalogUrl()
String datasetId = ids.getID()

if (nestedCrawl) {
urlOpenDAPMap.put(datasetId, urlString)
} else {
if (dsCatalogUrl.contains(topCatalogUrl)) urlOpenDAPMap.put(datasetId, urlString)
}
}
}
}
}

public void crawl(String catalogUrl, boolean nested) {
logger = Logger.getLogger(TdsCrawler.class)
topCatalogUrl = catalogUrl
nestedCrawl = nested
Context ctx = null
final List crawlLst = new ArrayList()
CatalogCrawler.Listener listener = new CatalogCrawler.Listener() {

public boolean getCatalogRef(InvCatalogRef catRef, Object obj) {

if (catRef != null) {
crawlLst.add(catRef.getURI().toASCIIString())
return true
}
return false
}

public void getDataset(InvDataset ids, Object crawler) {
doHarvest(ids)
}
}

CatalogCrawler crawler = new CatalogCrawler(
CatalogCrawler.USE_ALL, false, listener)

crawler.crawl(
catalogUrl,
null, System.out, ctx)
}

public Map getOpenDAPUrls() {
return urlOpenDAPMap
}
}


Test class TdsCrawlerTest.groovy

package util

import static org.junit.Assert.*
import org.junit.Test
import org.apache.log4j.Logger
import services.ServicesMonitor

class TdsCrawlerTest {
def logger

@Test
public void crawlTest() {
logger = Logger.getLogger(TdsCrawlerTest.class)
def tdsEndPoint = "http://www.ngdc.noaa.gov/thredds/bathyCatalog.xml"

def tds = new TdsCrawler()
logger.debug "tdsEndPoint=" + tdsEndPoint
tds.crawl(tdsEndPoint, false)

def openDapEndPts = tds.getOpenDAPUrls()
println "odpSize=" + openDapEndPts.size()
println "Loop thru map:"



assertTrue(openDapEndPts.size() > 0)

}
}

No comments: