• Jump To … +
    server.coffee src/actionknob.coffee src/autosem.coffee src/bitbucket_kba.coffee src/browserlog.coffee src/datareduction.coffee src/dci.coffee src/dciknob.coffee src/deeseeeye.coffee src/dnd.coffee src/doof.coffee src/formurla-mngr.coffee src/fractalpanel.coffee src/fractalpanel_test.coffee src/front.coffee src/ingestor.coffee src/kbabitbucket.coffee src/knobctrl.coffee src/lib_test.coffee src/nanoclock.coffee src/noodb.coffee src/noodbabstract.coffee src/noodbbrowser.coffee src/noodbbrowser_test.coffee src/noodbsec.coffee src/noorauth.coffee src/noorplugin.coffee src/noorquery.coffee src/noorvm.coffee src/noorwrite.coffee src/quadparser.coffee src/quadparsern3.coffee src/rbac.coffee src/reactor.coffee src/rebase.coffee src/rsrcidx.coffee src/sandboxactions.coffee src/screen_ctx.coffee src/spogi.coffee src/tabular_widget.coffee src/visctrl.coffee src/voicesknob.coffee src/whowhen.coffee src/xsd2native.coffee
  • quadparser.coffee

  • ¶

    based on https://github.com/talis/rdfquads.js

    _ = (window? and window._) or require('underscore')
    toNative = require('./xsd2native').toNative
    
    uriRegex = /<([^>]*)>/
    literalRegex = /^([-]?\d.*)|(?:\"(.*)\")(?:\@([a-z]+)|\^\^(.*)|(?:))$/
    
    class RdfUri
      constructor: (url) ->
        match = url.match(uriRegex)
        if match
          @raw = match[1]
        else
          @raw = url
        return
      toString: ->
        @raw
    
    class RdfObject
      constructor: (val) ->
        if typeof val isnt undefined and val.match
          uriMatch = val.match(uriRegex)
  • ¶

    console.log “WE ARE HERE”,val

          if uriMatch
  • ¶

    console.log “uriMatch”,uriMatch

            @raw = uriMatch[0]
            @value = uriMatch[1]
            @type = "uri"
            @where = "A"
          else
            literalMatch = val.match(literalRegex)
  • ¶

    console.log “literalMatch:”, literalMatch, “>#{val}<”

            if literalMatch
              @raw = literalMatch[0]
              @type = 'literal'
              @where = "B"
              if literalMatch[1] and literalMatch[1].indexOf('-') < 1 # starts with digit
                @isNum = true
                @value = literalMatch[1]
              if literalMatch[2] # wrapped in quotes
                @value = literalMatch[2]
              if literalMatch[3] # @lang
                @where += "_lang"
                @literal_lang = literalMatch[3]
              if literalMatch[4] # ^^literalType
                @where += "_type"
                @literal_type = literalMatch[4]
            else
  • ¶

    console.log “literalRegex failed for”, val

              @raw = val
              @type = 'FAILED'
              @value = val
              @where = "C_literalRegex failed"
        else
          @raw = val  # maybe an integer?  TODO tighten this up big time!
          @type = "literal"
      getNativeValue: ->
        if not @ntval?
          @ntval = toNative(@value,@literal_type,@isNum,@raw)
          if not @ntval?
            return @value
        return @ntval
      toString: ->
        @raw
      repr: ->
        console.log "NO, THIS DOES NOT GET CALLED!"
        if @type is 'literal'
          if _.isString(@raw)
            return "\"#{@raw}\""
          else if _.isNumber(@raw)
            return @raw
          else
            throw new Error "RdfObject(#{@raw}) is a literal which is neither a String nor a Number"
        else if @type is 'uri'
          return "<#{@raw}>"
        else
          throw new Error "RdfObject(#{@raw}).type is neither literal nor uri"
      isUri: ->
        @type is "uri"
      isLiteral: ->
        @type is "literal"
      getLiteralType: ->
        @literal_type
      uri_or_literal: ->
        if @isLiteral()
          return [@raw]  # TODO remove this and the complementary unwrapping of the array
        else
          return @raw # "<#{@raw}>"
      getRawOrValue: ->
        if @isLiteral()
          return @raw
        else
          return @value
    
    class Quad # TODO change to SPOGI
      constructor: (subject, pred, obj, graph, id) ->
        @s = new RdfUri(subject)
        @p = new RdfUri(pred)
        @o = new RdfObject(obj)
        @g = new RdfUri(graph)
        @i = id
        return
      toString: ->
        id = @i? and " # #{@i}" or ""
        "<" + @s + "> <" + @p + "> " + @o.repr() + " <" + @g + "> .#{id}"
      repr: ->
        @toString()
      asLine: ->
        @toString() + "\n"
    
    spogiRegex = ///
    
      \s*                          # zero or more blank spaces
    
  • ¶

    the subject

         (<[^>]*>                  #   an uri
          |_:[A-Za-z][A-Za-z0-9]*) #   'blank' identifier
    
      \s+                          # at least one blank space
  • ¶

    the predicate

         (<[^>]*>                  #   an uri TODO require content
          |_:[A-Za-z][A-Za-z0-9]*) #   'blank' identifier
    
      \s+                          # at least one blank space
  • ¶

    the object

         (
          (?:[-]?\d[^\s]*)         #   a numbery-type thingy or date
          |(?:\"(?:.*)\")          #   a stringy-type thingy
           (?:\@(?:[a-z_]+)        #     with language (optional)
           |\^\^(?:.*)             #     or type (optional)
           |(?:)                   #     or neither
           )
          |(?:<[^>]*>|)            #   or uri
         )
    
      \s+                          # at least one blank space
         (<[^>]*>|)                # the (optional) graph
      \s*\.                        # the quad-concluding period (.)
    
    
  • ¶

    the optional nooron-style id for the spogi

      \s*\#*\s*(
        .*       # TODO replace this wildcard with the following, once debugged
  • ¶
       [A-Za-z0-9]*_       #   user_symbol (actually should NOT have leading digits)
       [A-Za-z0-9]*_       #   session_no
       [A-Za-z0-9]*_       #   sec since unix epoch
       [A-Za-z0-9]*        #   nsec part of time
    
                )$                 # TODO permit subsequent comments after a space
      ///
    
    
  • ¶

    https://regex101.com/r/A48eDR/1/ this captures everything into a single group with (?:

    ttlObjectRegex = ///
    
      (
        (\"                        # the start of a quoted region
         .*                        # anything (but what about quote escapes or CR escapes?)
         \"                        # the end of a quoted region
          (                        # optionally, followed by
            (\@[a-z][a-z])           # a language spec eg @en or @de
           |
           (\^\^                     # a datatype spec, a pair of carets followed by a curie or url
             ([\w]+\:[\w]*             #   a curie (with a possible blank local part)
              |<[^>]+>                 #   an uri
              )
            )
           |                         # or nothing
          )
        )
       |
         ([\w]+\:[\w]*             #   a curie (with a possible blank local part)
          |<[^>]+>                 #   an uri
          )
      )
    
    ///
  • ¶

    https://regex101.com/r/FJVaaM/5/

    n5Regex = ///
      ^                            # the beginning of the line
    
      \s*                          # zero or more blank spaces
    
  • ¶

    the subject

         ([\w]+\:[\w\-]*             #   a curie (with a possible blank local part)
          |<[^>]+>                 #   an uri
          )
    
      \s+                          # at least one blank space
  • ¶

    the predicate

         ([\w]+\:[\w\-]*             #   a curie (with a possible blank local part)
          |<[^>]+>                 #   an uri
          )
    
      \s+                          # at least one blank space
  • ¶

    the object (consisting of either a quoted and either typed or untyped string or an URI)

      (
        (\"                        # the start of a quoted region GROUP 3
         .*                        # anything (but what about quote escapes or CR escapes?)
         \"                        # the end of a quoted region
          (                        # optionally, followed by
            (\@[a-z][a-z])           # a language spec eg @en or @de  GROUP 6
           |
            (\^\^                     # a datatype spec, a pair of carets followed by a curie or url  GROUP 7
             ([\w]+\:[\w\-]*             #   a curie (with a possible blank local part) GROUP 8
              |<[^>]+>                 #   an uri
              )
             )
           |                         # or nothing
          )
        )
       |
         (?:[\w]+\:[\w\-]*             #   a curie (with a possible blank local part) (?: match everything enclosed
          |<[^>]+>                 #   an uri
          )
      )
    
      \s+                          # at least one blank space
    
  • ¶

    the graph (required)

         ([\w]+\:[\w\-]*             #   a curie (with a possible blank local part)  GROUP 9
          |<[^>]+>                 #   an uri
          )
    
      \s+                          # at least one blank space
      \.                           # the trailing period
      \s+                          # at least one blank space
      \#                           # the TTL/TRIG comment character
      \s+                          # at least one blank space
    
        ([\w]+)                    # a mandatory nooron-style id like SFM_crm114_bfg995_pdq357   GROUP 10
  • ¶

    TODO replace the above wildcard with the following, once debugged [A-Za-z0-9]_ # user_symbol (actually should NOT have leading digits) [A-Za-z0-9]_ # session_no [A-Za-z0-9]_ # sec since unix epoch [A-Za-z0-9] # nsec part of time

      .*                           # permit subsequent comments after a space
      $                            # the end of the line
      ///
    
    isComment = /^\s*\/\//
    
    parseQuadLine = (line) ->
      if not line? or line is "" or line.match(isComment)
        null
      else
        match = line.match(spogiRegex)
        if match
          retval =
            s: new RdfUri(match[1].trim()).raw
            p: new RdfUri(match[2].trim()).raw
            o: new RdfObject(match[3].trim()).uri_or_literal()
            g: new RdfUri(match[4].trim()).raw
            i: match[5].trim()
          return retval
        else
          console.log "spogiRegex FAILED:", line
    
    parseQuintLineToPenta = (line) ->
      if not line? or line is "" or line.match(isComment)
        null
      else
        match = line.match(spogiRegex)
        if match
          retval =
            s: new RdfUri(match[1].trim())
            p: new RdfUri(match[2].trim())
            o: new RdfObject(match[3].trim())
            g: new RdfUri(match[4].trim())
            i: match[5].trim()
          return retval
        else
          console.log "spogiRegex FAILED:", line
    
    parseQuadLineToQuint = (line) ->
      if not line? or line is "" or line.match(isComment)
        null
      else
        match = line.match(spogiRegex)
        if match
          retval = [
            new RdfUri(match[1].trim()).raw
            new RdfUri(match[2].trim()).raw
            new RdfObject(match[3].trim()).getRawOrValue()
            new RdfUri(match[4].trim()).raw
            match[5].trim()]
          return retval
        else
          console.log "spogiRegex FAILED:", line
    
    parseN5LineToQuint = (line) ->
      if not line? or line is "" or line.match(isComment)
        null
      else
        match = line.match(n5Regex)
        if match
    
  • ¶

    TODO make full use of the hints in these other groups to perfect persistence group 3 is the object URI or CURIE (when present) group 4 is the object string value (when present) group 5 is the (when present) group 6 is the language spec (when present) eg ‘@en’ group 7 is ^^ (when present) eg ‘^^’ group 8 is the datatype spec (when present) eg ‘xsd:int’ | http://oink.ly

          noodbId = match[10].trim()
          retval = [
            new RdfUri(match[1].trim()).raw
            new RdfUri(match[2].trim()).raw
            new RdfObject(match[3].trim()).getRawOrValue()
            new RdfUri(match[9].trim()).raw
            noodbId]
          return retval
        else
          console.log "n5Regex FAILED:", line
    
    
    (exports ? this).parseQuadLine = parseQuadLine  # deprecated
    (exports ? this).parseQuintLineToPenta = parseQuintLineToPenta # deprecated
    (exports ? this).parseQuadLineToQuint = parseQuadLineToQuint
    (exports ? this).parseN5LineToQuint = parseN5LineToQuint
    (exports ? this).ttlObjectRegex = ttlObjectRegex
    
    (exports ? this).Quad = Quad
    (exports ? this).RdfObject = RdfObject