▲
/
pw
ruby
URI.rb
class RDF::Node
  def R; WebResource.new to_s end
end
class RDF::URI
  def R; WebResource.new to_s end
end
class String
  def R; WebResource.new self end
end

# shutoff /etc/hosts lookup
Resolv::DefaultResolver.replace_resolvers([Resolv::DNS.new(:nameserver => '1.1.1.1')])

class WebResource < RDF::URI

  def R; self end # call #R to cast to WebResource (paths, URI strings, Hash with 'uri' field)
  def self.[] u; WebResource.new u end # enable R[] constructor syntax

  alias_method :uri, :to_s

  module URIs
    def + u; R[to_s + u.to_s] end
    def match p; to_s.match p end

    #URI constants
    W3 = 'http://www.w3.org/'
    OA = 'https://www.w3.org/ns/oa#'
    Purl = 'http://purl.org/'
    DC   = Purl + 'dc/terms/'
    DCe  = Purl + 'dc/elements/1.1/'
    SIOC = 'http://rdfs.org/sioc/ns#'
    Link = DC + 'link'
    Schema = 'http://schema.org/'
    Media = 'http://search.yahoo.com/mrss/'
    Podcast = 'http://www.itunes.com/dtds/podcast-1.0.dtd#'
    Comments = 'http://wellformedweb.org/CommentAPI/commentRss'
    Sound    = Purl + 'ontology/mo/Sound'
    Image    = DC + 'Image'
    Video    = DC + 'Video'
    RSS      = Purl + 'rss/1.0/'
    Date     = DC   + 'date'
    Title    = DC   + 'title'
    Abstract = DC   + 'abstract'
    Post     = SIOC + 'Post'
    To       = SIOC + 'addressed_to'
    From     = SIOC + 'has_creator'
    Creator  = SIOC + 'has_creator'
    Content  = SIOC + 'content'
    BlogPost = SIOC + 'BlogPost'
    InstantMessage = SIOC + 'InstantMessage'
    Resource = W3   + '2000/01/rdf-schema#Resource'
    Stat     = W3   + 'ns/posix/stat#'
    Atom     = W3   + '2005/Atom#'
    Type     = W3 + '1999/02/22-rdf-syntax-ns#type'
    Label    = W3 + '2000/01/rdf-schema#label'
    Size     = Stat + 'size'
    Mtime    = Stat + 'mtime'
    Container = W3  + 'ns/ldp#Container'
    Contains  = W3  + 'ns/ldp#contains'

  end
  module HTTP

    ## short-URI resolution, cached with no expiry (do any major services allow editing?)
    Short = -> re {
      host = re.env['HTTP_HOST']
      source = re.env['rack.url_scheme'] + '://' + host + re.path
      dest = nil

      cache = R['/.cache/' + host + (re.path[0..2] || '') + '/' + (re.path[3..-1] || '') + '.u']
      if cache.exist?
        dest = cache.readFile
      else
        dest = (Net::HTTP.get_response (URI.parse source))['location']
        cache.writeFile dest
        puts "#{re.path[1..-1]} -> #{dest}"
      end

      [200, {'Content-Type' => 'text/html'}, [re.htmlDocument({source => {'dest' => dest ? dest.R : nil}})]]}

  end
  module Webize

    def triplrUriList addHost = false
      base = stripDoc
      name = base.basename

      # list resource
      yield base.uri, Type, R[DC+'List']
      yield base.uri, Title, name
      prefix = addHost ? "https://#{name}/" : ''

      # lines
      (open localPath).readlines.map{|line|
        t = line.chomp.split ' '
        unless t.empty?
          uri = prefix + t[0]
          title = t[1..-1].join ' ' if t.size > 1

          # triples
          yield uri, Type, R[Resource]
          if title
            yield uri, Title, title
          end
        end}
    end
  end

end
AdHoc.rb
=begin
 many ways to direct traffic to proxy:

 (port 443) $ setcap 'cap_net_bind_service=+ep' `realpath /usr/bin/python3`
 * default-gateway setting
 * iptables/routing configuration 
 * concatenate proxyhosts to /etc/hosts
 * see mitmproxy/Squid/Solid docs for ideas
 (arbitrary high-port i.e. 8000)
 * browser proxy-settings
 * OS proxy-settings

proxy
 mitmproxy -p 443 --showhost -m reverse:http://localhost:8000 --set keep_host_header=true
 
certificates
 cd ~/.mitmproxy/
 openssl x509 -inform PEM -subject_hash_old -in mitmproxy-ca-cert.pem
 su -c 'ln mitmproxy-ca-cert.pem /oreo/system/etc/security/cacerts/c8750f0d.0' # adjust to match above command's hashed-value

=end

class WebResource
  module HTTP

    # URI shorteners
    %w{bit.ly bos.gl cfl.re ift.tt n.pr nyti.ms ow.ly t.co trib.al w.bos.gl}.map{|host|Host[host] = Short}

    # URI encoded in URI
    Host['l.instagram.com'] = -> re {[302,{'Location' => re.q['u']},[]]}

    # nonlocal fonts, redirect to local
    Host['fonts.gstatic.com'] = Host['fonts.googleapis.com'] = -> re {
      location = '/.conf/font.woff'
      if re.path == location
        re.fileResponse
      elsif re.path == '/css'
        [200, {'Content-Type' => 'text/css'}, ['body {background-color: pink !important}']]
      else
        [301, {'Location' => location, 'Access-Control-Allow-Origin' => '*'}, []]
      end}

  end
end

# additions to stdlib classes:
# #do conditionally binds var + runs block on non-nil arguments
# #justArray maps nil -> [] and obj -> [obj]
# #intersperse is borrowed from Haskell
class Array
  def justArray; self end
  def intersperse i; inject([]){|a,b|a << b << i}[0..-2] end
end

class FalseClass
  def do; self end
end

class NilClass
  def justArray; [] end
  def do; self end
end

class Object
  def justArray; [self] end
  def id; self end
  def do; yield self end
  def to_time; [Time, DateTime].member?(self.class) ? self : Time.parse(self) end
end
Gemfile.lock
GEM
  remote: https://rubygems.org/
  specs:
    addressable (2.5.1)
      public_suffix (~> 2.0, >= 2.0.2)
    bcp47 (0.3.3)
      i18n
    builder (3.2.3)
    coderay (1.1.1)
    concurrent-ruby (1.0.5)
    daemons (1.2.4)
    dimensions (1.3.0)
    ebnf (1.1.1)
      rdf (~> 2.2)
      sxp (~> 1.0)
    equivalent-xml (0.6.0)
      nokogiri (>= 1.4.3)
    eventmachine (1.2.3)
    foreman (0.84.0)
      thor (~> 0.19.1)
    haml (5.0.1)
      temple (>= 0.8.0)
      tilt
    hamster (3.0.0)
      concurrent-ruby (~> 1.0)
    htmlentities (4.3.4)
    i18n (0.8.6)
    icalendar (2.4.1)
    json-ld (2.1.5)
      multi_json (~> 1.12)
      rdf (~> 2.2)
    kgio (2.11.0)
    ld-patch (0.3.1)
      ebnf (~> 1.0, >= 1.0.1)
      rdf (~> 2.0)
      rdf-xsd (~> 2.0)
      sparql (~> 2.0)
      sxp (~> 1.0)
    link_header (0.0.8)
    linkeddata (2.2.2)
      equivalent-xml (~> 0.6)
      json-ld (~> 2.1)
      ld-patch (~> 0.3)
      nokogiri (~> 1.7)
      rdf (~> 2.2)
      rdf-aggregate-repo (~> 2.1)
      rdf-isomorphic (~> 2.0)
      rdf-json (~> 2.0)
      rdf-microdata (~> 2.1)
      rdf-n3 (~> 2.1)
      rdf-normalize (~> 0.3)
      rdf-rdfa (~> 2.1)
      rdf-rdfxml (~> 2.0)
      rdf-reasoner (~> 0.4)
      rdf-tabular (~> 2.2)
      rdf-trig (~> 2.0)
      rdf-trix (~> 2.0)
      rdf-turtle (~> 2.2)
      rdf-vocab (~> 2.1)
      rdf-xsd (~> 2.1)
      sparql (~> 2.1)
      sparql-client (~> 2.1)
    mail (2.6.6)
      mime-types (>= 1.16, < 4)
    method_source (0.8.2)
    mime-types (3.1)
      mime-types-data (~> 3.2015)
    mime-types-data (3.2016.0521)
    mini_portile2 (2.2.0)
    multi_json (1.12.1)
    net-http-persistent (2.9.4)
    nokogiri (1.8.0)
      mini_portile2 (~> 2.2.0)
    nokogiri-diff (0.2.0)
      nokogiri (~> 1.5)
      tdiff (~> 0.3, >= 0.3.2)
    pry (0.10.4)
      coderay (~> 1.1.0)
      method_source (~> 0.8.1)
      slop (~> 3.4)
    pry-doc (0.10.0)
      pry (~> 0.9)
      yard (~> 0.9)
    public_suffix (2.0.5)
    rack (2.0.3)
    raindrops (0.18.0)
    rdf (2.2.6)
      hamster (~> 3.0)
      link_header (~> 0.0, >= 0.0.8)
    rdf-aggregate-repo (2.2.0)
      rdf (~> 2.0)
    rdf-isomorphic (2.0.0)
      rdf (~> 2.0)
    rdf-json (2.0.0)
      rdf (~> 2.0)
    rdf-microdata (2.2.1)
      htmlentities (~> 4.3)
      nokogiri (~> 1.7)
      rdf (~> 2.2)
      rdf-xsd (~> 2.1)
    rdf-n3 (2.1.0)
      rdf (~> 2.0)
    rdf-normalize (0.3.2)
      rdf (~> 2.0)
    rdf-rdfa (2.2.2)
      haml (~> 5.0)
      htmlentities (~> 4.3)
      rdf (~> 2.2)
      rdf-aggregate-repo (~> 2.2)
      rdf-xsd (~> 2.1)
    rdf-rdfxml (2.0.0)
      htmlentities (~> 4.3)
      rdf (~> 2.0)
      rdf-rdfa (~> 2.0)
      rdf-xsd (~> 2.0)
    rdf-reasoner (0.4.2)
      rdf (~> 2.2)
      rdf-vocab (~> 2.2)
      rdf-xsd (~> 2.1)
    rdf-tabular (2.2.0)
      addressable (~> 2.3)
      bcp47 (~> 0.3, >= 0.3.3)
      json-ld (~> 2.0)
      rdf (~> 2.1)
      rdf-vocab (~> 2.0)
      rdf-xsd (~> 2.0)
    rdf-trig (2.0.0)
      ebnf (~> 1.0, >= 1.0.1)
      rdf (~> 2.0)
      rdf-turtle (~> 2.0)
    rdf-trix (2.0.0)
      rdf (~> 2.0)
    rdf-turtle (2.2.0)
      ebnf (~> 1.1)
      rdf (~> 2.2)
    rdf-vocab (2.2.3)
      rdf (~> 2.2)
    rdf-xsd (2.2.0)
      rdf (~> 2.1)
    redcarpet (3.4.0)
    slop (3.6.0)
    sparql (2.2.1)
      builder (~> 3.2)
      ebnf (~> 1.1)
      rdf (~> 2.2)
      rdf-aggregate-repo (~> 2.2)
      rdf-xsd (~> 2.1)
      sparql-client (~> 2.1)
      sxp (~> 1.0)
    sparql-client (2.1.0)
      net-http-persistent (~> 2.9)
      rdf (~> 2.0)
    sxp (1.0.0)
      rdf (~> 2.0)
    tdiff (0.3.3)
    temple (0.8.0)
    thin (1.7.2)
      daemons (~> 1.0, >= 1.0.9)
      eventmachine (~> 1.0, >= 1.0.4)
      rack (>= 1, < 3)
    thor (0.19.4)
    tilt (2.0.7)
    unicorn (5.3.0)
      kgio (~> 2.6)
      raindrops (~> 0.7)
    yard (0.9.9)

PLATFORMS
  ruby

DEPENDENCIES
  dimensions
  foreman
  icalendar
  linkeddata
  mail
  nokogiri
  nokogiri-diff
  pry
  pry-doc
  rack
  redcarpet
  thin
  unicorn

BUNDLED WITH
   1.15.1
install
#!/usr/bin/env ruby
require 'fileutils'
require 'pathname'
loc = RbConfig::CONFIG["sitelibdir"] + '/'
FileUtils.mkdir_p loc unless Pathname(loc).exist?
FileUtils.ln_s (File.expand_path File.dirname __FILE__)+'/ww.rb', loc
Calendar.rb
class WebResource
  module Webize
# TODO ICal
    def triplrCalendar
      cal_file = File.open localPath
      cals = Icalendar::Calendar.parse(cal_file)
      cal = cals.first
      puts cal
      event = cal.events.first
      puts event
    end
  end
end
MIME.rb
# coding: utf-8
class WebResource
  module MIME
    include URIs

    # name prefix -> MIME
    MIMEprefix = {
      'authors' => 'text/plain',
      'changelog' => 'text/plain',
      'contributors' => 'text/plain',
      'copying' => 'text/plain',
      'dockerfile' => 'text/x-docker',
      'gemfile' => 'text/x-ruby',
      'license' => 'text/plain',
      'makefile' => 'text/x-makefile',
      'todo' => 'text/plain',
      'unlicense' => 'text/plain',
      'msg' => 'message/rfc822',
    }

    # name suffix -> MIME
    MIMEsuffix = {
      'asc' => 'text/plain',
      'atom' => 'application/atom+xml',
      'bat' => 'text/x-batch',
      'bu' => 'text/based-uri-list',
      'cfg' => 'text/ini',
      'chk' => 'text/plain',
      'conf' => 'application/config',
      'dat' => 'application/octet-stream',
      'db' => 'application/octet-stream',
      'desktop' => 'application/config',
      'doc' => 'application/msword',
      'docx' => 'application/msword+xml',
      'e' => 'application/json',
      'eot' => 'application/font',
      'go' => 'application/go',
      'haml' => 'text/plain',
      'hs' => 'application/haskell',
      'in' => 'text/x-makefile',
      'ini' => 'text/ini',
      'ino' => 'application/ino',
      'lisp' => 'text/x-lisp',
      'list' => 'text/plain',
      'log' => 'text/chatlog',
      'mbox' => 'application/mbox',
      'md' => 'text/markdown',
      'msg' => 'message/rfc822',
      'opml' => 'text/xml+opml',
      'rb' => 'text/x-ruby',
      'rst' => 'text/restructured',
      'ru' => 'text/x-ruby',
      'sample' => 'application/config',
      'sh' => 'text/x-shellscript',
      'terminfo' => 'application/config',
      'tmp' => 'application/octet-stream',
      'ttl' => 'text/turtle',
      'u' => 'text/uri-list',
      'webp' => 'image/webp',
      'woff' => 'application/font',
      'yaml' => 'text/plain',
    }

    # MIME -> RDF-yielding function
    Triplr = {
      'application/config'   => [:triplrDataFile],
      'application/font'      => [:triplrFile],
      'application/go'   => [:triplrCode],
      'application/haskell'   => [:triplrCode],
      'application/javascript' => [:triplrCode],
      'application/ino'      => [:triplrCode],
      'application/json'      => [:triplrDataFile],
      'application/mbox'      => [:triplrMbox],
      'application/octet-stream' => [:triplrFile],
      'application/org'      => [:triplrOrg],
      'application/pdf'      => [:triplrFile],
      'application/msword'   => [:triplrWordDoc],
      'application/msword+xml' => [:triplrWordXML],
      'application/pkcs7-signature' => [:triplrFile],
      'application/rtf'      => [:triplrRTF],
      'application/ruby'     => [:triplrCode],
      'application/sh'      => [:triplrCode],
      'application/x-sh'     => [:triplrCode],
      'application/xml'     => [:triplrDataFile],
      'application/x-executable' => [:triplrFile],
      'application/x-gzip'   => [:triplrArchive],
      'application/zip'   => [:triplrArchive],
      'application/vnd.oasis.opendocument.text' => [:triplrOpenDocument],
      'audio/mpeg'           => [:triplrAudio],
      'audio/x-wav'          => [:triplrAudio],
      'audio/3gpp'           => [:triplrAudio],
      'image/bmp'            => [:triplrImage],
      'image/gif'            => [:triplrImage],
      'image/png'            => [:triplrImage],
      'image/svg+xml'        => [:triplrImage],
      'image/tiff'           => [:triplrImage],
      'image/jpeg'           => [:triplrImage],
      'inode/directory'      => [:triplrContainer],
      'message/rfc822'       => [:triplrMail],
      'text/cache-manifest'  => [:triplrText],
      'text/calendar'        => [:triplrCalendar],
      'text/chatlog'         => [:triplrChatLog],
      'text/css'             => [:triplrCode],
      'text/csv'             => [:triplrCSV,/,/],
      'text/html'            => [:triplrHTML],
      'text/man'             => [:triplrMan],
      'text/xml+opml'        => [:triplrOPML],
      'text/x-batch'         => [:triplrBat],
      'text/x-c'             => [:triplrCode],
      'text/x-asm'           => [:triplrCode],
      'text/x-lisp'          => [:triplrLisp],
      'text/x-docker'        => [:triplrDocker],
      'text/ini'             => [:triplrIni],
      'text/x-makefile'      => [:triplrMakefile],
      'text/x-java-source'   => [:triplrCode],
      'text/x-ruby'          => [:triplrRuby],
      'text/x-php'           => [:triplrCode],
      'text/x-python'        => [:triplrCode],
      'text/x-script.ruby'   => [:triplrCode],
      'text/x-script.python' => [:triplrCode],
      'text/x-shellscript'   => [:triplrShellScript],
      'text/markdown'        => [:triplrMarkdown],
      'text/nfo'             => [:triplrText,'cp437'],
      'text/plain'           => [:triplrText],
      'text/restructured'    => [:triplrCode],
      'text/rtf'             => [:triplrRTF],
      'text/semicolon-separated-values' => [:triplrCSV,/;/],
      'text/tab-separated-values' => [:triplrCSV,/\t/],
      'text/uri-list'        => [:triplrUriList],
      'text/based-uri-list'        => [:triplrUriList,true],
      'text/x-tex'           => [:triplrTeX],
    }

    # file -> MIME
    def mime
      @mime ||= # memoize
        (name = path || ''
         prefix = ((File.basename name).split('.')[0]||'').downcase
         suffix = ((File.extname name)[1..-1]||'').downcase
         if node.directory? # container
           'inode/directory'
         elsif MIMEprefix[prefix] # prefix mapping
           MIMEprefix[prefix]
         elsif MIMEsuffix[suffix] # suffix mapping
           MIMEsuffix[suffix]
         elsif Rack::Mime::MIME_TYPES['.'+suffix] # suffix mapping (Rack fallback)
           Rack::Mime::MIME_TYPES['.'+suffix]
         else
           puts "#{localPath} unmapped MIME, sniffing content (SLOW)"
           `file --mime-type -b #{Shellwords.escape localPath.to_s}`.chomp
         end)
    end

    # file -> boolean
    def isRDF; %w{atom n3 owl rdf ttl}.member? ext end

    # file -> RDF file
    def toRDF; isRDF ? self : transcode end
    def transcode
      return self if ext == 'e'
      hash = node.stat.ino.to_s.sha2
      doc = R['/.cache/'+hash[0..2]+'/'+hash[3..-1]+'.e']
      unless doc.e && doc.m > m
        tree = {}
        triplr = Triplr[mime]
        unless triplr
          puts "WARNING missing #{mime} triplr for #{uri}"
          triplr = :triplrFile
        end
        send(*triplr){|s,p,o|
          tree[s] ||= {'uri' => s}
          tree[s][p] ||= []
          tree[s][p].push o}
        doc.writeFile tree.to_json
      end
      doc
    end

    # file -> preview file
    def filePreview
      p = join('.' + basename + '.jpg').R
      if !p.e
        if mime.match(/^video/)
          `ffmpegthumbnailer -s 256 -i #{sh} -o #{p.sh}`
        else
          `gm convert #{sh} -thumbnail "256x256" #{p.sh}`
        end
      end
      p.e && p.entity(@r) || notfound
    end

    # env -> MIMEs indexed on q-val
    def accept k = 'HTTP_ACCEPT'
      index = {}
      @r[k].do{|v|
        (v.split /,/).map{|e| # (MIME,q) tuples
          format, q = e.split /;/ # this pair
          i = q && q.split(/=/)[1].to_f || 1.0 # q or default
          index[i]||=[]; index[i].push format.strip}} # index q-val
      index
    end

    # env -> MIME
    def selectMIME default='text/html'
      return 'application/atom+xml' if q.has_key?('feed')
      accept.sort.reverse.map{|q,formats| # sorted index, highest qval first
        formats.map{|mime| # formats at q-value
          return default if mime == '*/*'
          return mime if RDF::Writer.for(:content_type => mime) || %w{application/atom+xml text/html}.member?(mime)}} # terminate if serializable
      default
    end

  end
  module Webize
    include URIs
  end
  include MIME
  include Webize
end
Feed.rb
# coding: utf-8
class WebResource

  module HTML
    Markup[BlogPost] = -> post , env {
      {_: :table, class: :post,
       c: {_: :tr,
           c: [{_: :td, class: :type, c: {_: :a, class: :newspaper, href: post.uri}},
               {_: :td, class: :contents, c: (HTML.kv post, env)}]}}}
  end

  module Feed
    include URIs

    def feeds; puts (nokogiri.css 'link[rel=alternate]').map{|u|join u.attr :href} end

    def fetchFeed
      head = {} # request header
      cache = R['/.cache/'+uri.sha2+'/'] # storage
      etag = cache + 'etag'      # cache etag URI
      priorEtag = nil            # cache etag value
      mtime = cache + 'mtime'    # cache mtime URI
      priorMtime = nil           # cache mtime value
      body = cache + 'body.atom' # cache body URI
      if etag.e
        priorEtag = etag.readFile
        head["If-None-Match"] = priorEtag unless priorEtag.empty?
      elsif mtime.e
        priorMtime = mtime.readFile.to_time
        head["If-Modified-Since"] = priorMtime.httpdate
      end
      begin # conditional GET
        open(uri, head) do |response|
          curEtag = response.meta['etag']
          curMtime = response.last_modified || Time.now rescue Time.now
          etag.writeFile curEtag if curEtag && !curEtag.empty? && curEtag != priorEtag # new ETag value
          mtime.writeFile curMtime.iso8601 if curMtime != priorMtime # new Last-Modified value
          # TODO cache status for 301 moved-permanently link-maintenance. entire header to RDF?
          resp = response.read
          unless body.e && body.readFile == resp
            body.writeFile resp # new cached body
            ('file:'+body.localPath).R.indexFeed :format => :feed, :base_uri => uri # run indexer
          end
        end
      rescue OpenURI::HTTPError => error
        msg = error.message
        puts [uri,msg].join("\t") unless msg.match(/304/)
      end
    rescue Exception => e
      puts uri, e.class, e.message
    end
    def fetchFeeds; open(localPath).readlines.map(&:chomp).map(&:R).map(&:fetchFeed) end

    alias_method :getFeed, :fetchFeed

    def indexFeed options = {}
      # TODO alternate storage-locations (comments in post subdir, lkml in hourdir)
      g = RDF::Repository.load self, options
      g.each_graph.map{|graph|
        graph.query(RDF::Query::Pattern.new(:s,R[R::Date],:o)).first_value.do{|t| # find timestamp
          time = t.gsub(/[-T]/,'/').sub(':','/').sub /(.00.00|Z)$/, ''
          slug = (graph.name.to_s.sub(/https?:\/\//,'.').gsub(/[\W_]/,'..').sub(/\d{12,}/,'')+'.').gsub(/\.+/,'.')[0..127].sub(/\.$/,'')
          doc =  R["/#{time}#{slug}.ttl"]
          unless doc.e
            doc.dir.mkdir
            cacheBase = doc.stripDoc
            graph << RDF::Statement.new(graph.name, R[DC+'cache'], cacheBase)
            RDF::Writer.open(doc.localPath){|f|f << graph}
            puts cacheBase
          end
          true}}
      self
    rescue Exception => e
      puts uri, e.class, e.message
    end

    class Format < RDF::Format
      content_type     'application/atom+xml', :extension => :atom
      content_encoding 'utf-8'
      reader { WebResource::Feed::Reader }
    end

    class Reader < RDF::Reader
      include URIs
      format Format

      def initialize(input = $stdin, options = {}, &block)
        @doc = (input.respond_to?(:read) ? input : StringIO.new(input.to_s)).read.to_utf8
        @base = (options[:base_uri] || '/').R
        @host = @base.host
        if block_given?
          case block.arity
          when 0 then instance_eval(&block)
          else block.call(self)
          end
        end
        nil
      end

      def each_triple &block; each_statement{|s| block.call *s.to_triple} end

      def each_statement &fn # triples flow (left ← right)
        scanContent(:normalizeDates, :normalizePredicates,:rawTriples){|s,p,o|
          fn.call RDF::Statement.new(s.R, p.R,
                                     (o.class == WebResource || o.class == RDF::URI) ? o : (l = RDF::Literal (if p == Content
                                                                                                    R::HTML.strip o
                                                                                                   else
                                                                                                     o.gsub(/<[^>]*>/,' ')
                                                                                                    end)
                                                                                  l.datatype=RDF.XMLLiteral if p == Content
                                                                                  l), :graph_name => s.R)}
      end

      def scanContent *f
        send(*f){|s,p,o|
          if p==Content && o.class==String
            subject = s.R
            # emit HTML links as RDF
            content = Nokogiri::HTML.fragment o

            # <a>
            content.css('a').map{|a|
              (a.attr 'href').do{|href|
                link = subject.join href
                re = link.R
                a.set_attribute 'href', link
                if %w{gif jpeg jpg png webp}.member? re.ext.downcase
                  yield s, Image, re
                elsif (%w{mp4 webm}.member? re.ext.downcase) || (re.host && re.host.match(/(vimeo|youtu)/))
                  yield s, Video, re
                elsif re != subject
                  yield s, DC+'link', re
                end }}

            # <img>
            content.css('img').map{|i|
              (i.attr 'src').do{|src|
                yield s, Image, (subject.join src) }}

            # <iframe>
            content.css('iframe').map{|i|
              (i.attr 'src').do{|src|
                src = src.R
                if src.host && src.host.match(/youtu/)
                  id = src.parts[-1]
                  yield s, Video, R['https://www.youtube.com/watch?v='+id]
                end }}

            # full HTML content
            yield s, p, content.to_xhtml
          else
            yield s, p, o
          end }
      end

      def normalizePredicates *f
        send(*f){|s,p,o|
          yield s,
                {Atom+'content' => Content,
                 Atom+'displaycategories' => Label,
                 Atom+'enclosure' => SIOC+'attachment',
                 Atom+'link' => DC+'link',
                 Atom+'summary' => Abstract,
                 Atom+'title' => Title,
                 DCe+'subject' => Title,
                 DCe+'type' => Type,
                 Media+'title' => Title,
                 Media+'description' => Abstract,
                 Media+'community' => Content,
                 Podcast+'author' => Creator,
                 Podcast+'episodeType' => Label,
                 Podcast+'keywords' => Label,
                 Podcast+'title' => Title,
                 Podcast+'subtitle' => Title,
                 YouTube+'videoId' => Label,
                 YouTube+'channelId' => SIOC+'user_agent',
                 RSS+'category' => Label,
                 RSS+'description' => Content,
                 RSS+'encoded' => Content,
                 RSS+'modules/content/encoded' => Content,
                 RSS+'modules/slash/comments' => SIOC+'num_replies',
                 RSS+'source' => DC+'source',
                 RSS+'title' => Title,
                }[p]||p, o }
      end

      def normalizeDates *f
        send(*f){|s,p,o|
          yield *({'CreationDate' => true,
                   'Date' => true,
                   RSS+'pubDate' => true,
                   Date => true,
                   DCe+'date' => true,
                   Atom+'published' => true,
                   Atom+'updated' => true
                  }[p] ?
                    [s,Date,Time.parse(o).utc.iso8601] : [s,p,o])}
      end

      def rawTriples

        # identifiers
        reRDF = /about=["']?([^'">\s]+)/              # RDF @about
        reLink = /<link>([^<]+)/                      # <link> element
        reLinkCData = /<link><\!\[CDATA\[([^\]]+)/    # <link> CDATA block
        reLinkHref = /<link[^>]+rel=["']?alternate["']?[^>]+href=["']?([^'">\s]+)/ # <link> @href @rel=alternate
        reLinkRel = /<link[^>]+href=["']?([^'">\s]+)/ # <link> @href
        reId = /<(?:gu)?id[^>]*>([^<]+)/              # <id> element
        reURL = /\A(\/|http)[\S]+\Z/                  # HTTP URI

        # elements
        reHead = /<(rdf|rss|feed)([^>]+)/i
        reXMLns = /xmlns:?([a-z0-9]+)?=["']?([^'">\s]+)/
        reItem = %r{<(?<ns>rss:|atom:)?(?<tag>item|entry)(?<attrs>[\s][^>]*)?>(?<inner>.*?)</\k<ns>?\k<tag>>}mi
        reElement = %r{<([a-z0-9]+:)?([a-z]+)([\s][^>]*)?>(.*?)</\1?\2>}mi
        reGroup = /<\/?media:group>/i
        reMedia = %r{<(link|enclosure|media)([^>]+)>}mi
        reSrc = /(href|url|src)=['"]?([^'">\s]+)/
        reRel = /rel=['"]?([^'">\s]+)/

        # XML name-space
        x = {}
        head = @doc.match(reHead)
        head && head[2] && head[2].scan(reXMLns){|m|
          prefix = m[0]
          base = m[1]
          base = base + '#' unless %w{/ #}.member? base [-1]
          x[prefix] = base}

        # scan items
        @doc.scan(reItem){|m|
          attrs = m[2]
          inner = m[3]
          # identifier search. prefer already RDF with lots of fallbacks
          u = (attrs.do{|a|a.match(reRDF)} || inner.match(reLink) || inner.match(reLinkCData) || inner.match(reLinkHref) || inner.match(reLinkRel) || inner.match(reId)).do{|s|s[1]}
          if u # identifier match
            u = @base.join(u).to_s unless u.match /^http/
            resource = u.R

            yield u, Type, R[BlogPost]
            blogs = [resource.join('/')]
            blogs.push @base.join('/') if @host && @host != resource.host # re-blog at another host
            blogs.map{|blog|
              yield u, R::To, blog}

            inner.scan(reMedia){|e|
              e[1].match(reSrc).do{|url|
                rel = e[1].match reRel
                rel = rel ? rel[1] : 'link'
                o = (@base.join url[2]).R
                # TODO Cache media
                p = case o.ext.downcase
                    when 'jpg'
                      R::Image
                    when 'jpeg'
                      R::Image
                    when 'png'
                      R::Image
                    else
                      R::Atom + rel
                    end
                yield u,p,o unless resource == o}}

            inner.gsub(reGroup,'').scan(reElement){|e|
              p = (x[e[0] && e[0].chop]||R::RSS) + e[1] # namespaced attribute-names
              if [Atom+'id', RSS+'link', RSS+'guid', Atom+'link'].member? p
               # subject URI candidates above
              elsif [Atom+'author', RSS+'author', RSS+'creator', DCe+'creator'].member? p
                crs = [] # creators
                uri = e[3].match /<uri>([^<]+)</
                crs.push uri[1].R if uri
                name = e[3].match /<name>([^<]+)</
                crs.push name[1] if name
                unless name || uri
                  crs.push e[3].do{|o|
                    o.match(reURL) ? o.R : o }
                end
                crs.map{|cr|
                  yield u, Creator, cr
                }
              else # basic element
                yield u,p,e[3].do{|o|
                  case o
                  when /^\s*<\!\[CDATA/m
                    o.sub /^\s*<\!\[CDATA\[(.*?)\]\]>\s*$/m,'\1'
                  when /</m
                    o
                  else
                    CGI.unescapeHTML o
                  end
                }.do{|o|o.match(/\A(\/|http)[\S]+\Z/) ? o.R : o }
              end
            }
          end}
      end
    end

    def renderFeed graph
      HTML.render ['<?xml version="1.0" encoding="utf-8"?>',
                   {_: :feed,xmlns: 'http://www.w3.org/2005/Atom',
                    c: [{_: :id, c: uri},
                        {_: :title, c: uri},
                        {_: :link, rel: :self, href: uri},
                        {_: :updated, c: Time.now.iso8601},
                        graph.map{|u,d|
                          {_: :entry,
                           c: [{_: :id, c: u}, {_: :link, href: u},
                               d[Date].do{|d|   {_: :updated, c: d[0]}},
                               d[Title].do{|t|  {_: :title,   c: t}},
                               d[Creator].do{|c|{_: :author,  c: c[0]}},
                               {_: :content, type: :xhtml,
                                c: {xmlns:"http://www.w3.org/1999/xhtml",
                                    c: d[Content]}}]}}]}]
    end
  end
  include Feed
  module Webize
    def triplrOPML
      # doc
      base = stripDoc
      yield base.uri, Type, R[DC+'List']
      yield base.uri, Title, basename
      # feeds
      Nokogiri::HTML.fragment(readFile).css('outline[type="rss"]').map{|t|
        s = t.attr 'xmlurl'
        yield s, Type, R[SIOC+'Feed']
      }
    end
  end
end
Text.rb
# coding: utf-8
class WebResource
  module Webize

    def triplrArchive &f;     yield uri, Type, R[Stat+'Archive']; triplrFile &f end
    def triplrAudio &f;       yield uri, Type, R[Sound]; triplrFile &f end
    def triplrDataFile &f;    yield uri, Type, R[Stat+'DataFile']; triplrFile &f end

    def triplrBat &f
      yield uri, Type, R[SIOC+'SourceCode']
      yield uri, Content, `pygmentize -l batch -f html #{sh}` end
    def triplrDocker &f
      yield uri, Type, R[SIOC+'SourceCode']
      yield uri, Content, `pygmentize -l docker -f html #{sh}` end
    def triplrIni &f
      yield uri, Type, R[SIOC+'SourceCode']
      yield uri, Content, `pygmentize -l ini -f html #{sh}` end
    def triplrMakefile &f
      yield uri, Type, R[SIOC+'SourceCode']
      yield uri, Content, `pygmentize -l make -f html #{sh}` end
    def triplrLisp &f
      yield uri, Type, R[SIOC+'SourceCode']
      yield uri, Content, `pygmentize -l lisp -f html #{sh}` end
    def triplrShellScript &f
      yield uri, Type, R[SIOC+'SourceCode']
      yield uri, Content, `pygmentize -l sh -f html #{sh}` end
    def triplrRuby &f
      yield uri, Type, R[SIOC+'SourceCode']
      yield uri, Content, `pygmentize -l ruby -f html #{sh}` end
    def triplrCode &f # generic, pygments determine file-type
      yield uri, Type, R[SIOC+'SourceCode']
      yield uri, Content, `pygmentize -f html #{sh}`
    end

    def triplrWord conv, argB='', &f
      yield uri, Type, R[Stat+'WordDocument']
      yield uri, Content, '<pre>' + `#{conv} #{sh} #{argB}` + '</pre>'
      triplrFile &f
    end

    def triplrRTF          &f; triplrWord :catdoc,        &f end
    def triplrWordDoc      &f; triplrWord :antiword,      &f end
    def triplrWordXML      &f; triplrWord :docx2txt, '-', &f end
    def triplrOpenDocument &f; triplrWord :odt2txt,       &f end

    def triplrText enc=nil, &f
      doc = stripDoc.uri
      yield doc, Type, R[Stat+'TextFile']
      yield doc, Title, stripDoc.basename
      mtime.do{|mt|
        yield doc, Date, mt.iso8601}
      yield doc, DC+'hasFormat', self
      yield doc, Content,
            HTML.render({_: :pre, style: 'white-space: pre-wrap',
               c: readFile.do{|r| enc ? r.force_encoding(enc).to_utf8 : r}.hrefs})
    rescue Exception => e
      puts uri, e.class, e.message
    end
    
    def triplrTeX
      yield stripDoc.uri, Content, `cat #{sh} | tth -r` end

    def triplrMarkdown
      doc = stripDoc.uri
      attr = stripDoc.basename == 'README' ? Abstract : Content
      yield doc, Type, R[Stat+'MarkdownFile']
      yield doc, Title, stripDoc.basename
      yield doc, attr, ::Redcarpet::Markdown.new(::Redcarpet::Render::Pygment, fenced_code_blocks: true).render(readFile)
      mtime.do{|mt|yield doc, Date, mt.iso8601}
    end

    def triplrCSV d
      ns    = W3 + 'ns/csv#'
      lines = CSV.read localPath
      lines[0].do{|fields| # header-row
        yield uri, Type, R[ns+'Table']
        yield uri, ns+'rowCount', lines.size
        lines[1..-1].each_with_index{|row,line|
          row.each_with_index{|field,i|
            id = uri + '#row:' + line.to_s
            yield id, fields[i], field
            yield id, Type, R[ns+'Row']}}}
    end
  end
end

class String
  def sha2; Digest::SHA2.hexdigest self end
  def to_utf8; encode('UTF-8', undef: :replace, invalid: :replace, replace: '?') end
  def utf8; force_encoding 'UTF-8' end
  def sh; Shellwords.escape self end

  # text -> HTML & yielded (rel,href) tuples
  def hrefs &blk
                                     # <> and () wrapping stripped, trailing [,.] dropped
    pre, link, post = self.partition(/(https?:\/\/(\([^)>\s]*\)|[,.]\S|[^\s),.”\'\"<>\]])+)/)
    pre.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;') + # pre-match
      (link.empty? && '' ||
       '<a class="link" href="' + link.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;') + '">' +
       (resource = link.R
        if blk # TODO resolve shortened link in background task
          type = case link
                 when /(gif|jpg|jpeg|jpg:large|png|webp)$/i
                   R::Image
                 when /(youtube.com|(mkv|mp4|webm)$)/i
                   R::Video
                 else
                   R::Link
                 end
          yield type, resource
        end
        '') +
       '</a>') +
      (post.empty? && '' || post.hrefs(&blk)) # recursion on post-match
  end
end
HTTP.rb
# coding: utf-8
class WebResource
  module HTTP
    include URIs
    Host = {}

    # Rack HTTP-call entry-point
    def self.call env
      return [405,{},[]] unless %w{HEAD GET}.member? env['REQUEST_METHOD']
      rawpath = env['REQUEST_PATH'].utf8.gsub /[\/]+/, '/'
      path = Pathname.new(rawpath).expand_path.to_s        # evaluate path
      path += '/' if path[-1] != '/' && rawpath[-1] == '/' # preserve trailing-slash
      env['q'] = parseQs env['QUERY_STRING']               # parse query
      puts env['HTTP_HOST'] + " " + (env['HTTP_REFERER']||'') + " " + (env['HTTP_USER_AGENT']||'') + " "
      path.R.environment(env).send env['REQUEST_METHOD']   # resource object
    rescue Exception => x
      [500,{'Content-Type'=>'text/plain'},[[x.class,x.message,x.backtrace].join("\n")]]
    end

    def self.parseQs qs
      if qs
        h = {}
        qs.split(/&/).map{|e|
          k, v = e.split(/=/,2).map{|x|CGI.unescape x}
          h[(k||'').downcase] = v}
        h
      else
        {}
      end
    end

    def environment env = nil # set (arg) or get
      if env
        @r = env
        self
      else
        @r
      end
    end
    alias_method :env, :environment

    def HEAD; self.GET.do{|s,h,b|[s,h,[]]} end

    def GET
      @r[:Response] = {}
      @r[:links] = {}
      parts = path[1..-1].split '/'

      ## bespoke GET handlers

      # host-specific mapping
      return Host[@r['HTTP_HOST']][self] if Host[@r['HTTP_HOST']]

      # dynamic date-dir redirect
      return (chronoDir parts) if (parts[0] || '').match(/^(y(ear)?|m(onth)?|d(ay)?|h(our)?)$/i)

      # (fs) directory requested and exists
      return [302,{'Location' => path + '/' + qs},[]] if node.directory? && path[-1] != '/'

      # (fs) file requested and exists
      return fileResponse if node.file?

      ## default GET handler

      # time-slice page pointers
      dp = [] # date parts
      dp.push parts.shift.to_i while parts[0] && parts[0].match(/^[0-9]+$/)
      n = nil; p = nil # next / prev pointer
      case dp.length
      when 1 # Y
        year = dp[0]
        n = '/' + (year + 1).to_s
        p = '/' + (year - 1).to_s
      when 2 # Y-m
        year = dp[0]
        m = dp[1]
        n = m >= 12 ? "/#{year + 1}/#{01}" : "/#{year}/#{'%02d' % (m + 1)}"
        p = m <=  1 ? "/#{year - 1}/#{12}" : "/#{year}/#{'%02d' % (m - 1)}"
      when 3 # Y-m-d
        day = ::Date.parse "#{dp[0]}-#{dp[1]}-#{dp[2]}" rescue nil
        if day
          p = (day-1).strftime('/%Y/%m/%d')
          n = (day+1).strftime('/%Y/%m/%d')
        end
      when 4 # Y-m-d-H
        day = ::Date.parse "#{dp[0]}-#{dp[1]}-#{dp[2]}" rescue nil
        if day
          hour = dp[3]
          p = hour <=  0 ? (day - 1).strftime('/%Y/%m/%d/23') : (day.strftime('/%Y/%m/%d/')+('%02d' % (hour-1)))
          n = hour >= 23 ? (day + 1).strftime('/%Y/%m/%d/00') : (day.strftime('/%Y/%m/%d/')+('%02d' % (hour+1)))
        end
      end
      sl = parts.empty? ? '' : (path[-1] == '/' ? '/' : '') # trailing slash
      @r[:links][:prev] = p + '/' + parts.join('/') + sl + qs + '#prev' if p && R[p].e
      @r[:links][:next] = n + '/' + parts.join('/') + sl + qs + '#next' if n && R[n].e
      @r[:links][:up] = dirname + (dirname == '/' ? '' : '/') + qs + '#r' + path.sha2 unless path=='/'

      # resource set
      set = selectNodes
      return notfound if !set || set.empty?
      format = selectMIME

      # response header
      @r[:Response].update({'Link' => @r[:links].map{|type,uri|"<#{uri}>; rel=#{type}"}.intersperse(', ').join}) unless @r[:links].empty?
      @r[:Response].update({'Content-Type' => %w{text/html text/turtle}.member?(format) ? (format+'; charset=utf-8') : format,
                            'ETag' => [set.sort.map{|r|[r,r.m]}, format].join.sha2,
                           })

      # conditional response
      entity @r, ->{
        if set.size == 1 && set[0].mime == format
          set[0] # static file good to go
        else # transcode and/or merge sources
          if format == 'text/html'
            htmlDocument load set
          elsif format == 'application/atom+xml'
            renderFeed load set
          else # RDF
            g = RDF::Graph.new
            set.map{|n| g.load n.toRDF.localPath, :base_uri => n.stripDoc }
            g.dump (RDF::Writer.for :content_type => format).to_sym, :base_uri => self, :standard_prefixes => true
          end
        end}
    end

    def notfound; [404,{'Content-Type' => 'text/html'},[htmlDocument]] end

    # querystring -> Hash
    def q fromEnv = true
      fromEnv ? @r['q'] : HTTP.parseQs(query)
    end

    def inDoc; path == @r['REQUEST_PATH'] end

    # env -> ?querystring
    def qs; @r['QUERY_STRING'] && !@r['QUERY_STRING'].empty? && ('?'+@r['QUERY_STRING']) || '' end

    # Hash -> ?querystring
    def HTTP.qs h; '?'+h.map{|k,v|k.to_s + '=' + (v ? (CGI.escape [*v][0].to_s) : '')}.intersperse("&").join('') end

  end
  include HTTP
end
Icons.rb
class WebResource
  module HTML
    Icons = {
      'uri' => :id,
      Comments => :comments,
      Contains => :bin,
      Container => :dir,
      Content => :pencil,
      Abstract => :quote,
      DC+'identifier' => :barcode,
      DC+'cache' => :chain,
      DC+'hasFormat' => :file,
      DC+'link' => :chain,
      DC+'List' => :list,
      Video => :video,
      Date => :date,
      Image => :img,
      Label => :tag,
      Mtime => :time,
      RSS+'comments' => :comments,
      InstantMessage => :comment,
      BlogPost => :pencil,
      SIOC+'ChatLog' => :comments,
      SIOC+'Discussion' => :comments,
      SIOC+'Feed' => :feed,
      SIOC+'MailMessage' => :envelope,
      SIOC+'MicroblogPost' => :newspaper,
      SIOC+'Post' => :newspaper,
      SIOC+'SourceCode' => :code,
      SIOC+'reply_of' => :reply,
      SIOC+'Thread' => :openenvelope,
      SIOC+'Tweet' => :bird,
      SIOC+'Usergroup' => :group,
      SIOC+'WikiArticle' => :pencil,
      SIOC+'has_creator' => :user,
      SIOC+'num_replies' => :comments,
      SIOC+'has_discussion' => :comments,
      SIOC+'user_agent' => :mailer,
      Schema+'Person' => :user,
      Schema+'location' => :location,
      Size => :size,
      Sound => :speaker,
      Stat+'Archive' => :archive,
      Stat+'DataFile' => :tree,
      Stat+'File' => :file,
      Stat+'HTMLFile' => :html,
      Stat+'MarkdownFile' => :markup,
      Stat+'TextFile' => :textfile,
      Stat+'WordDocument' => :word,
      Stat+'container' => :dir,
      Stat+'contains' => :dir,
      Stat+'height' => :height,
      Stat+'width' => :width,
      Title => :title,
      To => :userB,
      Type => :type,
      W3+'2000/01/rdf-schema#Resource' => :node,
    }
  end
end
HTML.rb
# coding: utf-8
class WebResource
  module HTML
    include URIs

    Contain = {}
    Markup = {}

    # contain year-containers at root in additional decade-container
    Contain['decades'] = -> graph {
      decades = {}
      other = []

      {'uri' => '/', Type => [R[Container]],
       Contains => (decades.values.concat other)}}

    # markup RDF type-tag
    Markup[Type] = -> t,env=nil {
      if t.respond_to? :uri
        t = t.R
        {_: :a, href: t.uri, c: Icons[t.uri] ? '' : (t.fragment||t.basename), class: Icons[t.uri]}
      else
        CGI.escapeHTML t.to_s
      end}

    Markup[Date] = -> date,env=nil { {_: :a, class: :date, href: '/' + date[0..13].gsub(/[-T:]/,'/'), c: date} }

    # Markup -> String
    def self.render x
      case x
      when String
        x
      when Hash # HTML element
        void = [:img, :input, :link, :meta].member? x[:_]
        '<' + (x[:_] || 'div').to_s +                        # open tag
          (x.keys - [:_,:c]).map{|a|                         # attribute name
          ' ' + a.to_s + '=' + "'" + x[a].to_s.chars.map{|c| # attribute value
            {"'"=>'%27', '>'=>'%3E', '<'=>'%3C'}[c]||c}.join + "'"}.join +
          (void ? '/' : '') + '>' + (render x[:c]) +         # child nodes
          (void ? '' : ('</'+(x[:_]||'div').to_s+'>'))       # close tag
      when Array
        x.map{|n|render n}.join
      when R
        render({_: :a, href: x.uri, id: 'link'+rand.to_s.sha2, c: x[:label][0] || (CGI.escapeHTML x.uri)})
      when NilClass
        ''
      when FalseClass
        ''
      else
        CGI.escapeHTML x.to_s
      end
    end

    # (p,[o]) -> Markup
    def self.value k, vs, env
      vs.justArray.map{|v|
        if Markup[k]
          Markup[k][v,env]
        elsif v.class == Hash
          resource = v.R
          types = resource.types
          if types.member? InstantMessage
            Markup[InstantMessage][resource,env]
          elsif types.member? Container
            Markup[Container][v,env]
          elsif types.member? BlogPost
            Markup[BlogPost][v,env]
          else
            kv v,env
          end
        elsif v.class == WebResource
          v
        elsif k == Content
          v
        elsif k == Abstract
          v
        elsif k == 'uri'
          u = v.R
          {_: :a, href: u.uri, id: 'link'+rand.to_s.sha2, c: "#{u.host} #{u.path} #{u.fragment}"}
        else
          CGI.escapeHTML v.to_s
        end
      }.intersperse ' '
    end

    # [rsrc,..] -> Markup
    def self.tabular resources, env
      ks = [[From, :from],
            [To,   :to],
            ['uri'],
            [Type],
            [Title,:title],
            [Abstract],
            [Date]]
      {_: :table, c: resources.sort_by{|r|r[Date].justArray[0] || ''}.reverse.map{|r|
         {_: :tr, c: ks.map{|k|
            keys = k[0]==Title ? [Title,Image,Video] : [k[0]]
            {_: :td, class: k[1],
             c: keys.map{|key|
               r[key].justArray.map{|v|
                 HTML.value key,v,env }.intersperse(' ')}}}}}}
    end

    # { k => v } -> Markup
    def self.kv hash, env
      {_: :table, class: :kv, c: hash.map{|k,vs|
         hide = k == Content && env['q'] && env['q'].has_key?('h')
         style = env[:colors][k] ||= HTML.colorize(k)
         {_: :tr,
          c: (if k == Contains
              {_: :td, colspan: 2, c: vs.justArray.map{|v| HTML.value k,v,env }}
             else
               [{_: :td, class: :k, style: style,
                 c: {_: :span, class: Icons[k] || :label, c: Icons[k] ? '' : k}},
                {_: :td, class: :v, style: style,
                 c: ["\n ",
                     vs.justArray.map{|v| HTML.value k,v,env }.intersperse(' ')]}]
              end)} unless hide}}
    end

    def htmlDocument graph = {}
      @r ||= {} # env
      title = graph[path+'#this'].do{|r| r[Title].justArray[0]} || # explicit title
              [*path.split('/'),q['q'] ,q['f']].map{|e|e && URI.unescape(e)}.join(' ') # pathname
      @r[:links] ||= {} # document-level links
      @r[:images] ||= {}  # image references
      @r[:colors] ||= {}  # image references
      htmlGrep graph, q['q'] if q['q']
      css = -> s {{_: :style, c: ["\n", ".conf/#{s}.css".R.readFile]}}
      cssFiles = [:icons]
      cssFiles.push :code if graph.values.find{|r|r.R.a SIOC+'SourceCode'}
      link = -> name,label {
        @r[:links][name].do{|uri| [{_: :span, style: "font-size: 2.4em", c: uri.R.data({id: name, label: label})}, "\n"]}}
      nodata = graph.empty?
      # output
      HTML.render ["<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n\n",
                   {_: :html, xmlns: "http://www.w3.org/1999/xhtml",
                    c: ["\n\n",
                        {_: :head,
                         c: [{_: :meta, charset: 'utf-8'},
                             {_: :title, c: title},
                             {_: :link, rel: :icon, href: '/.conf/icon.png'},
                             *@r[:links].do{|links| links.map{|type,uri|
                                 {_: :link, rel: type, href: CGI.escapeHTML(uri.to_s)}}},
                             css['site']].map{|e|['  ',e,"\n"]}}, "\n\n",
                        {_: :body,
                         c: ["\n", link[:up, '&nbsp;&nbsp;&#9650;'], '<br>',
                             link[:prev, '&#9664;'],
                             (if q.has_key? 't'
                              HTML.tabular graph.values, @r # tabular view
                             elsif nodata
                               [{_: :h1, c: 404}, HTML.kv(@r,@r)] # 404
                             else # graph -> tree -> markup
                               HTML.value Container, [(Contain[q['c']] || Contain[path == '/' ? 'decades' : 'tree'])[graph]], @r
                              end),
                             link[:next, '&#9654;'], '<br>',
                             link[:down,'&#9660;'],
                             cssFiles.map{|f|css[f]}, "\n",
                             {_: :script, c: ["\n", '.conf/site.js'.R.readFile]}, "\n",
                            ]}, "\n" ]}]
    end

    def nokogiri; Nokogiri::HTML.parse (open uri).read end

    def htmlGrep graph, q
      wordIndex = {}
      args = POSIX.splitArgs q
      args.each_with_index{|arg,i| wordIndex[arg] = i }
      pattern = /(#{args.join '|'})/i

      # find matches
      graph.map{|u,r|
        keep = !(r.has_key?(Abstract)||r.has_key?(Content)) || r.to_s.match(pattern)
        graph.delete u unless keep}

      # highlight matches
      graph.values.map{|r|
        (r[Content]||r[Abstract]).justArray.map(&:lines).flatten.grep(pattern).do{|lines|
          r[Abstract] = lines[0..5].map{|l|
            l.gsub(/<[^>]+>/,'')[0..512].gsub(pattern){|g| # capture match
              HTML.render({_: :span, class: "w#{wordIndex[g.downcase]}", c: g}) # wrap match
            }} if lines.size > 0 }}

      # highlighting CSS
      graph['#abstracts'] = {Abstract => HTML.render({_: :style, c: wordIndex.values.map{|i|
                                                        ".w#{i} {background-color: #{'#%06x' % (rand 16777216)}; color: white}\n"}})}
    end

    def self.colorize k
      if k.empty?
        ''
      elsif [Date, Type, To, From, DC+'cache', Size].member? k # base metadata
        "background-color: #ddd; color: #000"
      elsif [Contains, Content, Title, Link, Image, Video, 'status', 'uri'].member? k # content & title
        "background-color: #000; color: #fff"
      else # oddball metadata, colorize it
        "background-color: #{'#%06x' % (rand 16777216)}; color: #000"
      end
    end

    def self.strip body, loseTags=%w{iframe script style}, keepAttr=%w{alt href id name rel src title type}
      html = Nokogiri::HTML.fragment body
      loseTags.map{|tag| html.css(tag).remove} if loseTags
      html.traverse{|e|
        e.attribute_nodes.map{|a|
          a.unlink unless keepAttr.member? a.name}} if keepAttr
      html.to_xhtml(:indent => 0)
    end

  end
  include HTML
  module Webize
    def triplrHTML &f
      triplrFile &f
      yield uri, Type, R[Stat+'HTMLFile']
      n = Nokogiri::HTML.parse readFile
      n.css('title').map{|title| yield uri, Title, title.inner_text }
      n.css('meta[property="og:image"]').map{|m| yield uri, Image, m.attr("content").R }
    end
  end
end

module Redcarpet
  module Render
    class Pygment < HTML
      def block_code(code, lang)
        if lang
          IO.popen("pygmentize -l #{lang.downcase.sh} -f html",'r+'){|p|
            p.puts code
            p.close_write
            p.read
          }
        else
          code
        end
      end
    end
  end
end
POSIX.rb
class Pathname
  def R; R::POSIX.path to_s.utf8 end
end
class WebResource
  module POSIX

    def self.splitArgs args
      args.shellsplit
    rescue
      puts "shell tokenization failed: #{args}"
      args.split(/\W/)
    end

    # link mapped fs-nodes
    def ln n
      FileUtils.ln   node.expand_path, n.node.expand_path
    end
    #TODO relative symlink targets for multiple servers on differing mountpoints
    def ln_s n
      #puts "ln -s #{path} #{n.path}"
      FileUtils.ln_s node.expand_path, n.node.expand_path
    end

    def link n
      send LinkMethod, n unless n.exist?
    rescue Exception => e
      puts e,e.class,e.message
    end

    # read file at location
    def readFile; File.open(localPath).read end

    # write file at location
    def writeFile o; dir.mkdir; File.open(localPath,'w'){|f|f << o}; self end

    # touch mapped node
    def touch
      dir.mkdir
      FileUtils.touch localPath
    end

    # erase mapped node
    def delete
      node.delete
    end

    # contained children minus hidden nodes
    def children; node.children.delete_if{|f|f.basename.to_s.index('.')==0}.map &:R end

    # dirname of mapped path
    def dir; dirname.R if path end
    def dirname; File.dirname path if path end

    # storage-space usage
    def du; `du -s #{sh}| cut -f 1`.chomp.to_i end

    # FIND on path component
    def find p
      (p && !p.empty?) ? `find #{sh} -ipath #{('*'+p+'*').sh} | head -n 2048`.lines.map{|pth|POSIX.path pth.chomp} : []
    end

    # GLOB on path component
    def glob; (Pathname.glob localPath).map &:R end

    # existence check on mapped fs-node
    def exist?; node.exist? end
    def symlink?; node.symlink? end
    alias_method :e, :exist?

    # create container
    def mkdir; FileUtils.mkdir_p localPath unless exist?; self end

    # size of mapped node
    def size; node.size rescue 0 end

    # mtime of mapped node
    def mtime; node.stat.mtime end
    alias_method :m, :mtime

    # storage path -> URI
    def self.path p; p.sub(/^\./,'').gsub(' ','%20').gsub('#','%23').R end

    # URI -> storage path
    def localPath; @path ||= (URI.unescape(path[0]=='/' ? '.' + path : path)) end

    # Pathname
    def node; @node ||= (Pathname.new localPath) end

    # shell-escaped path
    def shellPath; localPath.utf8.sh end
    alias_method :sh, :shellPath

    # path components split on /
    def parts; path ? path.split('/') : [] end

    # basename of path component
    def basename; File.basename (path||'') end

    # strip document-format suffixes for content-type agnostic base-URI
    def stripDoc; R[uri.sub /\.(bu|e|html|json|log|md|msg|opml|ttl|txt|u)$/,''] end

    # name suffix
    def ext; (File.extname uri)[1..-1] || '' end

    # TLD of host
    def tld; host && host.split('.')[-1] || '' end

    # SHA2 hash of URI string
    def sha2; to_s.sha2 end

    # WebResource -> file(s)
    def selectNodes
      (if node.directory?
       if q.has_key?('f') && path!='/' # FIND
         found = find q['f']
         found
       elsif q.has_key?('q') && path!='/' # GREP
         grep q['q']
       else # LS
         if uri[-1] == '/' # inside container
           index = (self+'index.html').glob
           if !index.empty? && qs.empty? # static index
             index
           else
             [self, children]
           end
         else # outside container
           @r[:links][:down] = path + '/' + qs
           self
         end
       end
      else # GLOB
        @r[:glob] = match /[\*\{\[]/
        [self,(@r[:glob] ? self : (self+'.*')).glob]
       end).justArray.flatten.compact.uniq.select &:exist?
    end

    # pattern -> file(s)
    def grep q
      args = POSIX.splitArgs q
      case args.size
      when 0
        return []
      when 2
        cmd = "grep -rilZ #{args[0].sh} #{sh} | xargs -0 grep -il #{args[1].sh}"
      when 3
        cmd = "grep -rilZ #{args[0].sh} #{sh} | xargs -0 grep -ilZ #{args[1].sh} | xargs -0 grep -il #{args[2].sh}"
      when 4
        cmd = "grep -rilZ #{args[0].sh} #{sh} | xargs -0 grep -ilZ #{args[1].sh} | xargs -0 grep -ilZ #{args[2].sh} | xargs -0 grep -il #{args[3].sh}"
      else
        pattern = args.join '.*'
        cmd = "grep -ril #{pattern.sh} #{sh}"
      end
      `#{cmd} | head -n 1024`.lines.map{|path| POSIX.path path.chomp}
    end
  end

  include POSIX

  module Webize
    # emit RDF of file-metadata
    def triplrFile
      s = path

      size.do{|sz|
        yield s, Size, sz}

      mtime.do{|mt|
        yield s, Mtime, mt.to_i
        yield s, Date, mt.iso8601}
    end

    # RDFize container-metadata
    def triplrContainer
      s = path
      s = s + '/' unless s[-1] == '/'
      yield s, Type, R[Container]
      yield s, Size, children.size

      mtime.do{|mt|
        yield s, Mtime, mt.to_i
        yield s, Date, mt.iso8601}
    end
  end

  module HTTP
    # redirect to date dir
    def chronoDir ps
      time = Time.now
      loc = time.strftime(case ps[0][0].downcase
                          when 'y'
                            '%Y'
                          when 'm'
                            '%Y/%m'
                          when 'd'
                            '%Y/%m/%d'
                          when 'h'
                            '%Y/%m/%d/%H'
                          else
                          end)
      [303,@r[:Response].update({'Location' => '/' + loc + '/' + ps[1..-1].join('/') + qs}),[]]
    end

    def entity env, body = nil
      etags = env['HTTP_IF_NONE_MATCH'].do{|m|
        m.strip.split /\s*,\s*/ }
      if etags && (etags.include? env[:Response]['ETag'])
        [304, {}, []]
      else
        body = body ? body.call : self
        if body.class == WebResource # use Rack file-handler
          (Rack::File.new nil).serving((Rack::Request.new env),body.localPath).do{|s,h,b|
            [s,h.update(env[:Response]),b]}
        else
          [(env[:Status]||200), env[:Response], [body]]
        end
      end
    end

    def fileResponse
      @r[:Response].update({'Content-Type' => %w{text/html text/turtle}.member?(mime) ? (mime+'; charset=utf-8') : mime,
                            'ETag' => [m,size].join.sha2,
                            'Access-Control-Allow-Origin' => '*'
                           })
      @r[:Response].update({'Cache-Control' => 'no-transform'}) if mime.match /^(audio|image|video)/
      if q.has_key?('preview') && ext.match(/(mp4|mkv|png|jpg)/i)
        filePreview
      else
        entity @r
      end
    end

  end

  module HTML

    # graph to tree transform
    # filesystem-paths control the tree structure
    Contain['tree'] = -> graph {

      tree = {'uri' => '/',
              Type => [R[Container]],
              Contains => []
             }

      graph.values.map{|s|
        this = tree
        path = []
        s.R.path.do{|p|
          p.R.parts.map{|name|
            path.push name
            this = this[Contains].find{|c|c.R.basename==name} ||
                   (child = {'uri' => path.join('/'), Type => [R[Container]], Contains => []}
                    this[Contains].push child
                    child)}}

        s.map{|p,o|
          unless p=='uri'
            this[p] ||= []
            if this[p].class == Array
              this[p].push o
            else
              puts this[p].class, this[p]
            end
          end}}

      tree}

    Markup[Container] = -> container , env {
      c = container.R
      container.delete Type
      container.delete 'uri'
      {_: :table, class: :container, c: [
         {_: :tr, class: :name,
          c: [{_: :td, class: :label, c: {_: :a, href: c.uri, c: CGI.escapeHTML(c.basename)}},
              {_: :td, class: :spacer}
             ]},
         {_: :tr, class: :contents, c: {_: :td, colspan: 2, c: HTML.kv(container,env)}}]}}
  end

  module POSIX
    LinkMethod = begin # link-method capability test
                   file = '.cache/link'.R
                   link = '.cache/link_'.R
                   file.touch unless file.exist?
                   link.delete if link.exist?
                   file.ln link
                   :ln
                 rescue Exception => e
                   :ln_s
                 end
  end
end
Mail.rb
class WebResource
  module Webize

    def triplrMbox &b
      # TODO use formail or a ruby library? former forces the stdin support and entails process switch overhead but prob mmore robust anywas
    end

    def triplrMail &b
      m = Mail.read node; return unless m
      id = m.message_id || m.resent_message_id || rand.to_s.sha2 # Message-ID
      puts " MID #{id}" if @verbose
      msgURI = -> id { h=id.sha2; ['', 'msg', h[0], h[1], h[2], id.gsub(/[^a-zA-Z0-9]+/,'.')[0..96], '#this'].join('/').R}
      resource = msgURI[id]; e = resource.uri                # Message URI
      puts " URI #{resource}" if @verbose
      srcDir = resource.path.R; srcDir.mkdir # container
      srcFile = srcDir + 'this.msg'          # pathname
      unless srcFile.e
        link srcFile # link canonical-location
        puts "LINK #{srcFile}" if @verbose
      end
      yield e, DC+'identifier', id    # Message-ID as RDF
      yield e, DC+'cache', resource
      yield e, Type, R[SIOC+'MailMessage'] # RDF type

      # HTML body
      htmlFiles, parts = m.all_parts.push(m).partition{|p|p.mime_type=='text/html'}
      htmlCount = 0
      htmlFiles.map{|p| # HTML file
        html = srcDir + "#{htmlCount}.html"  # file location
        yield e, DC+'hasFormat', html        # file pointer
        unless html.e
          html.writeFile p.decoded  # store HTML email
          puts "HTML #{html}" if @verbose
        end
        htmlCount += 1 } # increment count

      # text/plain body
      parts.select{|p|
        (!p.mime_type || p.mime_type == 'text/plain') && # text parts
          Mail::Encodings.defined?(p.body.encoding)      # decodable?
      }.map{|p|
        yield e, Content,
              HTML.render({_: :pre,
                           c: p.decoded.to_utf8.lines.to_a.map{|l| # split lines
                             l = l.chomp # strip any remaining [\n\r]
                             if qp = l.match(/^((\s*[>|]\s*)+)(.*)/) # quoted line
                               depth = (qp[1].scan /[>|]/).size # > count
                               if qp[3].empty? # drop blank quotes
                                 nil
                               else # wrap quotes in <span>
                                 indent = "<span name='quote#{depth}'>&gt;</span>"
                                 {_: :span, class: :quote,
                                  c: [indent * depth,' ',
                                      {_: :span, class: :quoted, c: qp[3].gsub('@','').hrefs{|p,o|yield e, p, o}}]}
                               end
                             else # fresh line
                               [l.gsub(/(\w+)@(\w+)/,'\2\1').hrefs{|p,o|yield e, p, o}]
                             end}.compact.intersperse("\n")})} # join lines

      # recursive messages, digests, forwards, archives..
      parts.select{|p|p.mime_type=='message/rfc822'}.map{|m|
        content = m.body.decoded                   # decode message-part
        f = srcDir + content.sha2 + '.inlined.msg' # message location
        f.writeFile content if !f.e                # store message
        f.triplrMail &b} # triplr on contained message

      # From
      from = []
      m.from.do{|f|
        f.justArray.compact.map{|f|
          noms = f.split ' '
          if noms.size > 2 && noms[1] == 'at'
            f = "#{noms[0]}@#{noms[2]}"
          end
          puts "FROM #{f}" if @verbose 
          from.push f.to_utf8.downcase}} # queue address for indexing + triple-emitting
      m[:from].do{|fr|
        fr.addrs.map{|a|
          name = a.display_name || a.name # human-readable name
          yield e, Creator, name
          puts "NAME #{name}" if @verbose
        } if fr.respond_to? :addrs}
      m['X-Mailer'].do{|m|
        yield e, SIOC+'user_agent', m.to_s
        puts " MLR #{m}" if @verbose
      }

      # To
      to = []
      %w{to cc bcc resent_to}.map{|p|      # recipient fields
        m.send(p).justArray.map{|r|        # recipient
          puts "  TO #{r}" if @verbose
          to.push r.to_utf8.downcase }}    # queue for indexing
      m['X-BeenThere'].justArray.map{|r|to.push r.to_s} # anti-loop recipient
      m['List-Id'].do{|name|yield e, To, name.decoded.sub(/<[^>]+>/,'').gsub(/[<>&]/,'')} # mailinglist name

      # Subject
      subject = nil
      m.subject.do{|s|
        subject = s.to_utf8.gsub(/\[[^\]]+\]/){|l| yield e, Label, l[1..-2] ; nil }
        yield e, Title, subject}

      # Date
      date = m.date || Time.now rescue Time.now
      date = date.to_time.utc
      dstr = date.iso8601
      yield e, Date, dstr
      dpath = '/' + dstr[0..6].gsub('-','/') + '/msg/' # month
      puts "DATE #{date}\nSUBJ #{subject}" if @verbose && subject

      # index addresses
      [*from,*to].map{|addr|
        user, domain = addr.split '@'
        if user && domain
          apath = dpath + domain + '/' + user # address
          yield e, (from.member? addr) ? Creator : To, R[apath+'?head'] # To/From triple
          if subject
            slug = subject.scan(/[\w]+/).map(&:downcase).uniq.join('.')[0..63]
            mpath = apath + '.' + dstr[8..-1].gsub(/[^0-9]+/,'.') + slug # time & subject
            mpath = mpath + (mpath[-1] == '.' ? '' : '.')  + 'msg' # file-type extension
            mdir = '../.mail/' + domain + '/' # maildir
            %w{cur new tmp}.map{|c| R[mdir + c].mkdir} # maildir container
            mloc = R[mdir + 'cur/' + id.sha2 + '.msg'] # maildir entry
            iloc = mpath.R # index entry
            [iloc,mloc].map{|loc| loc.dir.mkdir # container
              unless loc.e
                link loc
                puts "LINK #{loc}" if @verbose
              end
            }
          end
        end
      }

      # index bidirectional refs
      %w{in_reply_to references}.map{|ref|
        m.send(ref).do{|rs|
          rs.justArray.map{|r|
            dest = msgURI[r]
            yield e, SIOC+'reply_of', dest
            destDir = dest.path.R; destDir.mkdir; destFile = destDir+'this.msg'
            # bidirectional reference link
            rev = destDir + id.sha2 + '.msg'
            rel = srcDir + r.sha2 + '.msg'
            if !rel.e # link missing
              if destFile.e # link
                destFile.link rel
              else # symlink. it may appear
                destFile.ln_s rel unless rel.symlink?
              end
            end
            srcFile.link rev if !rev.e}}}

      # attachments
      m.attachments.select{|p|Mail::Encodings.defined?(p.body.encoding)}.map{|p| # decodability check
        name = p.filename.do{|f|f.to_utf8.do{|f|!f.empty? && f}} ||                           # explicit name
               (rand.to_s.sha2 + (Rack::Mime::MIME_TYPES.invert[p.mime_type] || '.bin').to_s) # generated name
        file = srcDir + name                     # file location
        unless file.e
          file.writeFile p.body.decoded # store
          puts "FILE #{file}" if @verbose
        end
        yield e, SIOC+'attachment', file         # file pointer
        if p.main_type=='image'                  # image attachments
          yield e, Image, file                   # image link represented in RDF
          yield e, Content,                      # image link represented in HTML
                HTML.render({_: :a, href: file.uri, c: [{_: :img, src: file.uri}, p.filename]}) # render HTML
        end }
    end
    def indexMail
      triples = 0
      triplrMail{|s,p,o|triples += 1}
      puts "    #{triples} triples"
    rescue Exception => e
      puts uri, e.class, e.message
    end

    def indexMails; glob.map &:indexMail end
  end
end
Image.rb
class WebResource

  module URIs
    Instagram = 'https://www.instagram.com/'
    YouTube = 'http://www.youtube.com/xml/schemas/2015#'
  end

  # TODO Dedupe video embeds within request
  module Webize

    #TODO imagehost reqtime translation to RDF
    def triplrImage &f
      yield uri, Type, R[Image]
      yield uri, Image, self
      w,h = Dimensions.dimensions localPath
      yield uri, Stat+'width', w
      yield uri, Stat+'height', h
      triplrFile &f
    end

    def ig
      open(localPath).readlines.map(&:chomp).map{|ig|
        R[Instagram+ig].indexInstagram}
    end

  end
  module HTML

    Markup[Image] = -> image,env {
      if image.respond_to? :uri
        img = image.R
        if env[:images][img.uri]
        else
          env[:images][img.uri] = true
          {class: :thumb,
           c: {_: :a, href: img.uri,
               c: {_: :img, src: if !img.host # thumbnail
                    img.path + '?preview'
                  else
                    img.uri
                   end}}}
        end
      else
        CGI.escapeHTML image.to_s
      end
    }

    Markup[Video] = -> video,env {
      video = video.R
      if env[:images][video.uri]
      else
        env[:images][video.uri] = true
        if video.match /youtu/
          id = video.q(false)['v'] || video.parts[-1]
          {_: :iframe, width: 560, height: 315, src: "https://www.youtube.com/embed/#{id}", frameborder: 0, gesture: "media", allow: "encrypted-media", allowfullscreen: :true}
        else
          {class: :video,
           c: [{_: :video, src: video.uri, controls: :true}, '<br>',
               {_: :span, class: :notes, c: video.basename}]}
        end
      end
    }

  end
end
R
#!/usr/bin/env ruby
require 'ww'
ARGV[0].R.send *ARGV[1..-1]
Chat.rb
# coding: utf-8
class WebResource

  module URIs
    Twitter = 'https://twitter.com'
  end
  module HTML
    Markup[InstantMessage] = -> msg, env {
      [{c: [msg[Creator].map{|c|
              if c.respond_to? :uri
                name = c.R.fragment || c.R.basename || ''
                color = env[:colors][name] ||= (HTML.colorize name)
                {_: :a, class: :comment, style: color, href: msg.uri, c: name}
              else
                CGI.escapeHTML c
              end}, ' ',
            msg[Abstract], msg[Content],
            msg[Image].map{|i| Markup[Image][i,env]},
            msg[Video].map{|v| Markup[Video][v,env]}
           ]}," \n"]}
  end
  module Webize

    def twitter
      open(localPath).readlines.map(&:chomp).shuffle.each_slice(16){|s|
        readURI = Twitter + '/search?f=tweets&vertical=default&q=' + s.map{|u|'from:'+u.chomp}.intersperse('+OR+').join
        readURI.R.indexTweets}
    end

    def fetchTweets
      nokogiri.css('div.tweet > div.content').map{|t|
        s = Twitter + t.css('.js-permalink').attr('href')
        authorName = t.css('.username b')[0].inner_text
        author = R[Twitter + '/' + authorName]
        ts = Time.at(t.css('[data-time]')[0].attr('data-time').to_i).iso8601
        yield s, Type, R[SIOC+'InstantMessage']
        yield s, Date, ts
        yield s, Creator, author
        content = t.css('.tweet-text')[0]
        content.css('a').map{|a|
          a.set_attribute('id', 'tweetedlink'+rand.to_s.sha2)
          a.set_attribute('href', Twitter + (a.attr 'href')) if (a.attr 'href').match /^\//
          yield s, DC+'link', R[a.attr 'href']}
        yield s, Abstract, HTML.strip(content.inner_html).gsub(/<\/?span[^>]*>/,'').gsub(/\n/,'').gsub(/\s+/,' ')}
    end

    def indexTweets
      graph = {}
      # build graph
      fetchTweets{|s,p,o|
        graph[s] ||= {'uri'=>s}
        graph[s][p] ||= []
        graph[s][p].push o}
      # serialize tweets to file(s)
      graph.map{|u,r|
        r[Date].do{|t|
          slug = (u.sub(/https?/,'.').gsub(/\W/,'.')).gsub /\.+/,'.'
          time = t[0].to_s.gsub(/[-T]/,'/').sub(':','/').sub /(.00.00|Z)$/, ''
          doc = "/#{time}#{slug}.e".R
          unless doc.e
            puts u
            doc.writeFile({u => r}.to_json)
          end}}
    end

    def triplrChatLog &f
      linenum = -1
      base = stripDoc
      dir = base.dir
      log = base.uri
      basename = base.basename
      channel = dir + '/' + basename
      network = dir + '/' + basename.split('%23')[0] + '*'
      day = dir.uri.match(/\/(\d{4}\/\d{2}\/\d{2})/).do{|d|d[1].gsub('/','-')}
      readFile.lines.map{|l|
        l.scan(/(\d\d)(\d\d)(\d\d)[\s+@]*([^\(\s]+)[\S]* (.*)/){|m|
          s = base + '#l' + (linenum += 1).to_s
          yield s, Type, R[SIOC+'InstantMessage']
          yield s, Creator, R['#'+m[3]]
#          yield s, To, channel
          yield s, Content, m[4].hrefs{|p,o|
            yield s, p, o }
          yield s, Date, day+'T'+m[0]+':'+m[1]+':'+m[2] if day}}
      if linenum > 0 # summarize at log-URI
        yield log, Type, R[SIOC+'ChatLog']
        yield log, Date, mtime.iso8601
        yield log, To, network
        yield log, Title, basename.split('%23')[-1] # channel
        yield log, Size, linenum
      end
    end
  end
end
Gemfile
source "https://rubygems.org/"
gem 'dimensions'
gem 'foreman'
gem 'icalendar'
gem 'linkeddata'
gem 'mail'
gem 'nokogiri'
gem 'nokogiri-diff'
gem 'pry'
gem 'pry-doc'
gem 'rack'
gem 'redcarpet'
gem 'unicorn'
ww.rb
%w{cgi csv date digest/sha2 dimensions fileutils icalendar json linkeddata mail nokogiri open-uri pathname rack rdf redcarpet resolv-replace shellwords}.map{|r|require r}
%w{URI MIME HTTP HTML POSIX Feed JSON Text Mail Calendar Chat Icons Image AdHoc}.map{|i|require_relative i}
R = WebResource
JSON.rb
class Hash

  # cast to WebResource. reversible with Hash data preserved
  def R
    WebResource.new(uri).data self
  end

  def uri; self["uri"] end

end
class WebResource
  module JSON
    include URIs
    def [] p; (@data||{})[p].justArray end
    def data d; @data = (@data||{}).merge(d); self end
    def types; @types ||= self[Type].select{|t|t.respond_to? :uri}.map(&:uri) end
    def a type; types.member? type end
    def to_json *a; {'uri' => uri}.to_json *a end

    class Format < RDF::Format
      content_type     'application/json+rdf', :extension => :e
      content_encoding 'utf-8'
      reader { WebResource::JSON::Reader }
    end
    # native JSON format is RDF
    class Reader < RDF::Reader
      format Format
      def initialize(input = $stdin, options = {}, &block)
        @graph = ::JSON.parse (input.respond_to?(:read) ? input : StringIO.new(input.to_s)).read
        @base = options[:base_uri]
        if block_given?
          case block.arity
          when 0 then instance_eval(&block)
          else block.call(self)
          end
        end
        nil
      end
      def each_statement &fn
        @graph.map{|s,r|
          r.map{|p,o|
            o.justArray.map{|o|
              fn.call RDF::Statement.new(@base.join(s), RDF::URI(p),
                                         o.class==Hash ? @base.join(o['uri']) : (l = RDF::Literal o
                                                                                 l.datatype=RDF.XMLLiteral if p == 'http://rdfs.org/sioc/ns#content'
                                                                                 l))} unless p=='uri'}}
      end
      def each_triple &block; each_statement{|s| block.call *s.to_triple} end
    end
  end

  include JSON

  module HTTP
    # load JSON and RDF to URI-indexed Hashtable. HTML and Feed renderer take this as input
    def load set # file-set argument
      g = {}                 # JSON tree
      graph = RDF::Graph.new # RDF graph
      rdf,json = set.partition &:isRDF

      # load RDF data
      rdf.map{|n|
        graph.load n.localPath, :base_uri => n}
      graph.each_triple{|s,p,o| # each triple
        s = s.to_s; p = p.to_s # subject, predicate
        o = [RDF::Node, RDF::URI, WebResource].member?(o.class) ? o.R : o.value # object
        g[s] ||= {'uri'=>s}
        g[s][p] ||= []
        g[s][p].push o unless g[s][p].member? o} # insert

      # load JSON data
      json.map{|n|
        n.transcode.do{|transcode|
          ::JSON.parse(transcode.readFile).map{|s,re| # subject
            re.map{|p,o| # predicate object(s)
              o.justArray.map{|o| # each triple
                o = o.R if o.class==Hash
                g[s] ||= {'uri'=>s}
                g[s][p] ||= []
                g[s][p].push o unless g[s][p].member? o} unless p == 'uri' }}}} # insert

      g # loaded graph reference returned to caller
    end
  end
end
19
1524278346
2018-04-21T02:39:06+00:00