• Jump To … +
    server.coffee src/actionknob.coffee src/autosem.coffee src/bitbucket_kba.coffee src/browserlog.coffee src/datareduction.coffee src/dci.coffee src/dciknob.coffee src/deeseeeye.coffee src/dnd.coffee src/doof.coffee src/formurla-mngr.coffee src/fractalpanel.coffee src/fractalpanel_test.coffee src/front.coffee src/ingestor.coffee src/kbabitbucket.coffee src/knobctrl.coffee src/lib_test.coffee src/nanoclock.coffee src/noodb.coffee src/noodbabstract.coffee src/noodbbrowser.coffee src/noodbbrowser_test.coffee src/noodbsec.coffee src/noorauth.coffee src/noorplugin.coffee src/noorquery.coffee src/noorvm.coffee src/noorwrite.coffee src/quadparser.coffee src/quadparsern3.coffee src/rbac.coffee src/reactor.coffee src/rebase.coffee src/rsrcidx.coffee src/sandboxactions.coffee src/screen_ctx.coffee src/spogi.coffee src/tabular_widget.coffee src/visctrl.coffee src/voicesknob.coffee src/whowhen.coffee src/xsd2native.coffee
  • ingestor.coffee

  • ¶
    fs = require("fs")
    path = require("path")
    url = require("url")
    
    N3 = require("n3")
    N3Util = N3.Util
    
    Log = require("log")
    LineByLineReader = require("line-by-line")
    WhoWhen = require("./whowhen").WhoWhen
    parseQuadLine = require("./quadparser").parseQuadLine
    parseQuadLineToQuint = require("./quadparser").parseQuadLineToQuint
    parseN5LineToQuint = require("./quadparser").parseN5LineToQuint
    
    getAnyLiteralValue = (s) ->
  • ¶

    Deal with single-double-quoted and the triple-double-quoted forms which come out of N3

      closingTripleQuotesIdx = s.lastIndexOf('"""')
      if s.length > 5  and closingTripleQuotesIdx > 2 and s.substr(0,3) is '"""'
        suffix = s.substr(closingTripleQuotesIdx + 2)
        s = s.substr(2, closingTripleQuotesIdx - 2) + suffix # leave the outer "
      return N3Util.getLiteralValue(s) #.replace('new_class', 'TRIPLE QUOTES NOT RECOGNIZED BY INGESTOR')
    
    class Ingestor
      @instances: {}
      @noodb: null # a static class variable inherited by all subclasses
      @rdCtx: null
      @globalSpogiCount: null
      constructor: (@fullPath, @parent, @callback, noodb, rdCtx) ->
        @thereCanBeOnlyOne(noodb)
        @noodb ?= noodb # initialized only once, on the NooDB @rootIngestor
        @rdCtx ?= rdCtx # initialized only once, on the NooDB @rootIngestor
        @globalSpogiCount ?= 0
        @localSpogiCount = 0
    
      thereCanBeOnlyOne: (noodb) ->
        if @constructor.instances[@fullPath]?
          msg = "thereCanBeOnlyOne() skipping '#{@fullPath}'"
          throw new Error(msg)
        else
          noodb.log.debug("new #{@constructor.name}(#{@fullPath})")
          @constructor.instances[@fullPath] = this
    
      setRootPath: (@rootPath) ->
      getRootPath: -> # climb back to root of Ingestor tree if needed
        @rootPath or @parent.getRootPath()
    
      ingest: ->
        @noodb.log.info("#{@constructor.name}('#{@graphTerm}').ingest()")
    
      ensureQuintHasFourTerms: (quint) -> # quint should already have first three terms
        if not quint[3] # graph term .g is '' or undefined
          quint[3] = @graphTerm
        if not quint[3]
          throw new Error("missing .g term in <#{quint.join('┃')}>)")
    
      ensureQuintHasFiveTerms: (quint) ->
        @ensureQuintHasFourTerms(quint)
        if not quint[4] # index term .i is '' or undefined
          quint[4] = @noodb.synthetic_key_factory_next()
        if not quint[4]
          throw new Error("missing .i term in <#{quint.join('┃')}>)")
        quint[4] = "nrn:#{quint[4]}"
    
      saveQuint: (quint) ->
        noodb = @noodb
        line = quint.join('┃')
  • ¶

    if line.includes(‘nrn:’) @noodb.log.alert(“saveQuint(<#{line}>) includes ‘nrn:’”)

        noodb.latest_whowhen = new WhoWhen()
        noodb.latest_whowhen.parse(quint[4])
        if not noodb.earliest_whowhen?
          noodb.earliest_whowhen = noodb.latest_whowhen
        @indexAndMaybeQueueForReading(quint)
        @globalSpogiCount++
        @localSpogiCount++
    
      queueFileForReading: (quint) ->
        subj = quint[0]
        @noodb.log.info("queueFileForReading(#{subj})")
  • ¶

    if subj.match(/HorsePower/) @noodb.log.alert(“subj = <#{subj}>”)

        if subj.startsWith('file:')
  • ¶

    @noodb.log.warning(“#{subj} starts with ‘file:’”)

          rootRelativePath = subj.replace(/^file:\/\/\//, '')
          filePath = path.join(@rdCtx.file_root, rootRelativePath)
        else if not subj.match(/^\//) # does NOT start with /
          filePath = path.join(@parent.fullPath, subj)
        else
          throw new Error("no filePath yet: "+quint.join('┃'))
        if Ingestor.instances[filePath] # this file has already been queued
          return false
        try
          ingestor = null
          ingestor = makeIngestor(filePath, @, null, @noodb)
        catch e
          if e.toString().includes(' skipping ')
            @noodb.log.info(e)
          else
            @noodb.log.warning(e)
        finally
          if ingestor
            try
              ingestor.ingest()
            catch e
              console.log(e)
              process.exit()
        return
    
      indexAndMaybeQueueForReading: (quint) ->
        noodb = @noodb
        if @shouldQueueFileForReading(quint)
          noodb.log.debug(quint[0], "SHOULD be queued for reading")
          @queueFileForReading(quint)
        else
          noodb.log.debug(quint[0], "should NOT be queued for reading")
  • ¶

    if quint[0].match(/NooronActions/) console.log(@noodb.prefixdb.prefixes.tree())

        context =
          blank_prefix: @blank_prefix
          base_prefix: @basename
        spogi = noodb.index(quint, context)
        return spogi
    
      shouldIgnoreResource: (subj) ->
        if subj.match(/(.DS_Store|\~|.bak)$/) or  # common junk files
            subj.match(/^\#.*\#$/) # emacs save file
          @noodb.log.debug("shouldIgnoreResource('#{subj}') => true")
          return true
        return false
    
      shouldQueueFileForReading: (quint) ->
        if (@shouldIgnoreResource(quint[0]) or quint[0].match(/#/))
  • ¶

    console.log(“shouldQueueFileForReading() ==> false because #{quint[0]} should be ignored or contains hash”)

          return false
        return not not quint[2].match(/(ReadableKB|WriteableKB)$/)
    
      determineGraphTerm: ->
        ext = path.extname(@fullPath)
        @basename = path.basename(@fullPath, ext) # drop the extension, if present
        @graphTerm = "http://nooron.com/_/" + @basename
        @graphTerm.TYP = 4 # ie CURIE TODO break rsrcidx.TYPS into its own module and require it in this module
        @graphCURIE = 'nrn:' + @basename
        @noodb.log.debug("determineGraphTerm() calculated <#{@graphTerm}> in an unprincipled way")
    
      determineBlankPrefix: ->
        @blank_prefix = "#{@basename}_"
    
      inheritPermissionsFromParent: ->
        if @parent.graphCURIE
          @noodb.allege(@graphCURIE, 'nrn:inheritsPermissionsFrom', @parent.graphCURIE, 'nrn:permissionsKB')
        else
          @noodb.log.info("#{@graphCURIE} can not inheritPermissionsFromParent because @parent.graphCURIE = #{@parent.graphCURIE}")
    
    class FileIngestor extends Ingestor
    
      addGraphToMetaKB: ->
        @noodb.allege(@graphCURIE, 'rdf:type', 'nrn:ReadableKB', 'nrn:metaKB')
    
    class N3FileIngestor extends FileIngestor
      ingest: ->
        super
        @ext = path.extname(@fullPath)
  • ¶

    @noodb.log.alert(“mimetype = #{@mimetype}”)

        @determineGraphTerm()
        @determineBlankPrefix()
        @addGraphToMetaKB()
        @inheritPermissionsFromParent()
        @rdfStream = fs.createReadStream(@fullPath)
        @parser = N3.Parser({format: @mimetype})
        @parser.parse(@rdfStream, @ingestAssertion, @ingestPrefix)
    
      allegeMetaData: ->
        if @localSpogiCount?
          @noodb.allege(@graphCURIE, 'nrn:hasTripleCount', @localSpogiCount, 'nrn:metaKB')
        if @localPrefixCount?
          @noodb.allege(@graphCURIE, 'nrn:hasPrefixCount', @localPrefixCount, 'nrn:metaKB')
        if @lastChangeTime?
          @noodb.allege(@graphCURIE, 'nrn:lastChangeTime', @lastChangeTime, 'nrn:metaKB')
    
      afterReading: ->
        @noodb.log.notice("#{@localSpogiCount} SPOG read from <#{@fullPath}>")
        @allegeMetaData()
        if @callback?
          @callback.call(this)
    
      ingestAssertion: (error, quad, prefixes) =>
  • ¶

    @noodb.log.debug(“ingestAssertion(#{error},#{JSON.stringify(quad)},#{prefixes})”)

        metaQuads = []
        if not quad
          @afterReading()
          return
        if quad.subject is ""
          quad.subject = @graphCURIE
  • ¶

    If the subject is literally blank (ie not a BNode) it is a triple ABOUT the graph itself, eg <> rdfs:comment “What a commendable little graph!” . REVIEW This is metadata about the graph, so for now put it into the metaKB, too

          metaQuads.push(Object.assign({}, quad, {graph: 'nrn:metaKB'}))
        if quad.subject is '#'
          @noodb.log.info("skipping",quad)
          return
        quint = [quad.subject, quad.predicate, quad.object, quad.graph]
        @cleanseN3ObjectWrapper(quint)
        @ensureQuintHasFiveTerms(quint)
        retval = @saveQuint(quint)
        for metaQuad in metaQuads
          @ingestAssertion(null, metaQuad, null)
        return retval
    
      cleanseN3ObjectWrapper: (quint) ->
        o = quint[2]
        if N3Util.isLiteral(o)
          lit_val = new String(getAnyLiteralValue(o))
          lit_val.TYP = 1 # ie LITERAL TODO break rsrcidx.TYPS into its own module and require it in this module
          quint[2] = lit_val
    
      ingestPrefix: (prfx, expandsTo) =>
        @noodb.log.debug("ingestPrefix() #{prfx} : <#{expandsTo}>")
        @noodb.prefixdb.add_prefix(prfx, expandsTo)
        @localPrefixCount ?= 0
        @localPrefixCount++
  • ¶

    @noodb.log.alert(“ prefixdb keys: #{Object.keys(@noodb.prefixdb)}”) @noodb.log.alert(“ prefixdb.prefixes keys: #{Object.keys(@noodb.prefixdb.prefixes)}”)

    class TurtleFileIngestor extends N3FileIngestor
      @exts: ['.ttl','.nt'] # Class member aka static variable
      mimetype: 'text/turtle'
    
    class TrigFileIngestor extends N3FileIngestor
      @exts: ['.trig'] # Class member aka static variable
      mimetype: 'application/trig'
    
    class N5FileIngestor extends FileIngestor
  • ¶

    What is the N5 format? It is a custom Nooron format with these properties:

    • one-line-per-quint format like .nq
    • no PREFIX or BASE lines
    • terms with prefixes DO NOT have surrounding <>
    • literals with carriage returns have them escaped and are surrounded with “””
    • prefixes with a trailing underscore indicate that the term is a BNode within the namespace identified by the prefix
    • the five terms are followed by a period
    • the five terms are: 1)subject, 2)predicate 3)object 4)graph 5)allegation-id
    • the allegation-id has parts: a)userId b)userSession c)dateTime d)uniqueNanosec
    • the allegation-id is globally unique Examples: dvrsdata_:qJCZfEuTD rdf:value “t=12.0,22.9” nrn:dvrsdata nrn:SFM_0_4a72Xa_3dUC9G .
      ingest: ->
        super
        @determineGraphTerm()
        @addGraphToMetaKB()
        @lr = new LineByLineReader(@fullPath)
        @lr.on('error', @error)
        @lr.on('line', @ingestLine)
        @lr.on('end', @end)
  • ¶

    @graphTerm = path.basename(@fullPath)

      error: (err) =>
        @noodb.log(err)
    
      saveQuint: (quint) ->
        super
  • ¶

    In the case of reading from a Nooron .n5 file we want to cause the last_user_no_int to monotonically increase so user numbers don’t get reused.

        if quint[1] is 'nrn:isNewUserNumber'
          @noodb.last_user_no_int = Math.max(parseInt(quint[1]), @noodb.last_user_no_int)
    
      ingestLine: (line) =>
        line = line.trim()
        if line.length is 0 # ignore blank lines
          return
        noodb = @noodb
        if line.match(/^\s*\#/) # send file comments to DEBUG logger
          noodb.log.debug(line)
          return
        noodb.log.debug("line() #{line}")
        if line.startsWith('#')
          noodb.log.warning("skipping #{quint}")
          return
        quint = parseN5LineToQuint(line)
        if not quint or quint.length < 3
          msec_since_epoch = noodb.clock.asMsec()
          msg = "no spogi found: #{line}" # .substr(0,78)
          if msec_since_epoch - noodb.read_warn_last_time  > noodb.warn_time_threshold
            noodb.log.warning(msg)
            noodb.read_warn_last_time = msec_since_epoch
          noodb.log.warning(msg)
          return
        @ensureQuintHasFiveTerms(quint)
        @saveQuint(quint)
    
      end: =>
        @noodb.log.info("end() #{@fullPath} localSpogiCount=#{@localSpogiCount} globalSpogiCount:#{@globalSpogiCount}")
    
    class NQFileIngestor extends FileIngestor
  • ¶

    What is the NQ format? It is an early attempt at a file format for libnoo which has five terms including an optional id for the quad. Example: <kbs/primordialKB.nq> rdf:type nrn:ReadableKB nrn:primordialKB . # 1_2_1Zm5RM_o99xc

      ingest: ->
        super
        @determineGraphTerm()
        @addGraphToMetaKB()
        @lr = new LineByLineReader(@fullPath)
        @lr.on('error', @error)
        @lr.on('line', @ingestLine)
        @lr.on('end', @end)
  • ¶

    @graphTerm = path.basename(@fullPath)

      error: (err) =>
        @noodb.log(err)
    
      saveQuint: (quint) ->
        super
  • ¶

    In the case of reading from a Nooron .n5 file we want to cause the last_user_no_int to monotonically increase so user numbers don’t get reused.

        if quint[1] is 'nrn:isNewUserNumber'
          @noodb.last_user_no_int = Math.max(parseInt(quint[1]), @noodb.last_user_no_int)
    
      ingestLine: (line) =>
        line = line.trim()
        if line.length is 0 # ignore blank lines
          return
        noodb = @noodb
        if line.match(/^\s*\#/) # send file comments to DEBUG logger
          noodb.log.debug(line)
          return
        noodb.log.debug("line() #{line}")
        quint = parseQuadLineToQuint(line)
        if quint[0] == '#'
          noodb.log.warning("skipping #{quint}")
          return
        if not quint or quint.length < 3
          msec_since_epoch = noodb.clock.asMsec()
          msg = "no spogi found: #{line}" # .substr(0,78)
          if msec_since_epoch - noodb.read_warn_last_time  > noodb.warn_time_threshold
            noodb.log.warning(msg)
            noodb.read_warn_last_time = msec_since_epoch
          noodb.log.warning(msg)
          return
        @ensureQuintHasFiveTerms(quint)
        @saveQuint(quint)
    
      end: =>
        @noodb.log.info("end() #{@fullPath} localSpogiCount=#{@localSpogiCount} globalSpogiCount:#{@globalSpogiCount}")
    
    class DirectoryIngestor extends Ingestor
      ingest: ->
        super
        @determineGraphTerm()
        @inheritPermissionsFromParent()
        @noodb.log.info("DirectoryIngestor.ingest(#{@fullPath})")
        for fileOrDirName in fs.readdirSync(@fullPath)
          if @shouldIgnoreResource(fileOrDirName)
            continue
          try
            ingestor = makeIngestor(fileOrDirName, @, null, @noodb, @rdCtx)
          catch e
            if e.toString().includes(' skipping ')
              @noodb.log.info(e)
            else
              @noodb.log.warning(e)
          finally
            if ingestor
              try
                ingestor.ingest()
              catch e
                console.log(e)
                process.exit()
  • ¶

    return # so coffeescript does not put the return in the try

    class URLIngestor extends Ingestor
    
    resolveFilePath = (pathOrUri, parent) ->
  • ¶

    if uri starts with ‘file:’ or is a relative uri return it

      rootPath = parent.getRootPath()
      if pathOrUri.startsWith('file:///')
        nooronRootRelativePath = pathOrUri.replace(/^file:\/\/\//,'') # remove file:///
      if pathOrUri.indexOf(':') > -1
        throw new Error("<#{parent.fullPath}> contains <#{pathOrUri}> bad file path or an unrecognized url scheme")
      if pathOrUri.startsWith('/')
        if pathOrUri.startsWith(rootPath) # REVIEW under rootPath, so safe, right?
          fullPath = pathOrUri
        else
          nooronRootRelativePath = pathOrUri.replace(/^\//, '')
      else if pathOrUri.match(/^http/)
        throw new Error("resolveFilePath(<#{pathOrUri}>) should not be called on a non-file URI")
      else
        nooronRootRelativePath = path.join(parent.fullPath, pathOrUri)
      if not fullPath?
        if nooronRootRelativePath
          fullPath = path.resolve(rootPath, nooronRootRelativePath)
      parent.noodb.log.debug("nooronRootRelativePath <#{nooronRootRelativePath}> fullPath <#{fullPath}>")
      if not fullPath.startsWith('/')
        throw new Error("fullPath must start with / but: #{fullPath}")
      if fullPath?
        return fullPath
      throw new Error("<#{pathOrUri}> was neither file:/// nor root nor relative path")
    
    makeIngestor = (pathOrUri, parent, callback, noodb, rdCtx) ->
      if not parent.noodb?
        throw new Error("#{parent.fullPath} is missing .noodb when pathOrUri=#{pathOrUri}")
      if pathOrUri.match(/(\~|.bak|.md)$/)
        throw new Error("makeIngestor() skipping <#{pathOrUri}> because it is tedious")
      if pathOrUri.match(/\#/)
        throw new Error("makeIngestor() skipping <#{pathOrUri}> because filename contains #")
      if pathOrUri.match(/(.rdfs|.rdf|.xml|.json|.jsonld)$/)
        throw new Error("makeIngestor() skipping <#{pathOrUri}> not supporting .rdf[s]|.xml")
  • ¶

    is pathOrUri an url?

      if pathOrUri.match(/^(http:|https:)/)
  • ¶

    return new URLIngestor(pathOrUri, parent, callback)

        cls = URLIngestor
      else
        try
          fullPath = resolveFilePath(pathOrUri, parent)
          stats = fs.statSync(fullPath)
        catch err
          console.log("FAILING WHILE LOOKING IN", parent.fullPath, 'FOR', pathOrUri)
          throw new Error("can't find #{pathOrUri} in #{parent.fullPath}")
        if stats.isDirectory()
          cls = DirectoryIngestor
        else if stats.isFile()
          ext = path.extname(fullPath)
          if ext in TurtleFileIngestor.exts
            cls = TurtleFileIngestor
          else if ext in TrigFileIngestor.exts
            cls = TrigFileIngestor
          else if ext in ['.nq']
            cls = NQFileIngestor
          else if ext in ['.n5']
            cls = N5FileIngestor
      if not cls
        throw new Error("#{fullPath} (#{ext}) is neither an URL, a file or a directory")
      return new cls(fullPath, parent, callback, noodb, rdCtx)
    
    (exports ? this).makeIngestor = makeIngestor
    (exports ? this).Ingestor = Ingestor
    (exports ? this).getAnyLiteralValue = getAnyLiteralValue