MIME.rb
# coding: utf-8
=begin MIME-type handling
 JSON-cache is a RDF subset in a tree, trivially pickleable with stdlib JSON functions
 call #toRDF to make a JSON-cache entry for nonRDF. RDF::Reader enables use as RDF
=end
class R

  # prefix -> MIME
  # as suffix is optional, full names ("LICENSE", case-insensitive etc) match
  MIMEprefix = {
    'authors' => 'text/plain',
    'changelog' => 'text/plain',
    'config' => 'application/config',
    'contributors' => 'text/plain',
    'copying' => 'text/plain',
    'dockerfile' => 'text/plain',
    'gemfile' => 'application/ruby',
    'install' => 'text/plain',
    'license' => 'text/plain',
    'links' => 'text/plain',
    'makefile' => 'application/makefile',
    'msg' => 'message/rfc822', # Procmail msg.* from delivering to non-maildir containers
    'rakefile' => 'application/ruby',
    'readme' => 'text/markdown',
    'todo' => 'text/plain',
    'unlicense' => 'text/plain',
  }

  # suffix -> MIME
  MIMEsuffix = {
    'asc' => 'text/plain',
    'chk' => 'text/plain',
    'conf' => 'application/config',
    'desktop' => 'application/config',
    'doc' => 'application/msword',
    'docx' => 'application/msword+xml',
    'dat' => 'application/octet-stream',
    'db' => 'application/octet-stream',
    'e' => 'application/json',
    'eot' => 'application/font',
    'go' => 'application/go',
    'haml' => 'text/plain',
    'hs' => 'application/haskell',
    'ini' => 'text/plain',
    'ino' => 'application/ino',
    'md' => 'text/markdown',
    'msg' => 'message/rfc822',
    'list' => 'text/plain',
    'log' => 'text/chatlog',
    'ru' => 'text/plain',
    'rb' => 'application/ruby',
    'rst' => 'text/restructured',
    'sample' => 'application/config',
    'terminfo' => 'application/config',
    'tmp' => 'application/octet-stream',
    'ttl' => 'text/turtle',
    'u' => 'text/uri-list',
    'woff' => 'application/font',
    'yaml' => 'text/plain',
  }

  # MIME -> TriplrFunction
  Triplr = {
    'application/config'   => [:triplrDataFile],
    'application/font'      => [:triplrFile],
    'application/go'   => [:triplrSourceCode],
    'application/haskell'   => [:triplrSourceCode],
    'application/javascript' => [:triplrSourceCode],
    'application/ino'      => [:triplrSourceCode],
    'application/json'      => [:triplrDataFile],
    'application/octet-stream' => [:triplrFile],
    'application/org'      => [:triplrOrg],
    'application/pdf'      => [:triplrFile],
    'application/makefile'      => [:triplrSourceCode],
    'application/msword'   => [:triplrWordDoc],
    'application/msword+xml' => [:triplrWordXML],
    'application/pkcs7-signature' => [:triplrFile],
    'application/rtf'      => [:triplrRTF],
    'application/ruby'     => [:triplrSourceCode],
    'application/sh'      => [:triplrSourceCode],
    'application/x-sh'     => [:triplrSourceCode],
    'application/xml'     => [:triplrDataFile],
    'application/x-executable' => [:triplrFile],
    'application/x-gzip'   => [:triplrArchive],
    'application/vnd.oasis.opendocument.text' => [:triplrOpenDocument],
    'audio/mpeg'           => [:triplrAudio],
    'audio/x-wav'          => [:triplrAudio],
    'audio/3gpp'           => [:triplrAudio],
    'image/bmp'            => [:triplrImage],
    'image/gif'            => [:triplrImage],
    'image/png'            => [:triplrImage],
    'image/svg+xml'        => [:triplrImage],
    'image/tiff'           => [:triplrImage],
    'image/jpeg'           => [:triplrImage],
    'inode/directory'      => [:triplrContainer],
    'message/rfc822'       => [:triplrMail],
    'text/cache-manifest'  => [:triplrText],
    'text/chatlog'         => [:triplrChatLog],
    'text/css'             => [:triplrSourceCode],
    'text/csv'             => [:triplrCSV,/,/],
    'text/html'            => [:triplrHTML],
    'text/man'             => [:triplrMan],
    'text/x-c'             => [:triplrSourceCode],
    'text/x-ruby'          => [:triplrSourceCode],
    'text/x-php'           => [:triplrSourceCode],
    'text/x-python'        => [:triplrSourceCode],
    'text/x-script.ruby'   => [:triplrSourceCode],
    'text/x-script.python' => [:triplrSourceCode],
    'text/x-shellscript'   => [:triplrSourceCode],
    'text/markdown'        => [:triplrMarkdown],
    'text/nfo'             => [:triplrText,'cp437'],
    'text/plain'           => [:triplrText],
    'text/restructured'    => [:triplrMarkdown],
    'text/rtf'             => [:triplrRTF],
    'text/semicolon-separated-values' => [:triplrCSV,/;/],
    'text/tab-separated-values' => [:triplrCSV,/\t/],
    'text/uri-list'        => [:triplrUriList],
    'text/x-tex'           => [:triplrTeX],
  }

  # MIMEs w/ native output support
  Writable = %w{application/atom+xml text/html}

  # RDF type -> icon name
  Icons = {
    'uri' => :id,
    Type => :type,
    Container => :dir,
    Content => :pencil,
    Date => :date,
    Label => :tag,
    Title => :title,
    Sound => :speaker,
    Image => :img,
    Size => :size,
    Mtime => :time,
    To => :userB,
    DC+'hasFormat' => :file,
    DC+'cache' => :chain,
    Schema+'location' => :location,
    Stat+'File' => :file,
    Stat+'Archive' => :archive,
    Stat+'HTMLFile' => :html,
    Stat+'WordDocument' => :word,
    Stat+'DataFile' => :tree,
    Stat+'TextFile' => :textfile,
    Stat+'container' => :dir,
    Stat+'contains' => :dir,
    SIOC+'BlogPost' => :pencil,
    SIOC+'ChatLog' => :comments,
    SIOC+'Discussion' => :comments,
    SIOC+'InstantMessage' => :comment,
    SIOC+'MicroblogPost' => :newspaper,
    SIOC+'WikiArticle' => :pencil,
    SIOC+'Tweet' => :tweet,
    SIOC+'Usergroup' => :group,
    SIOC+'SourceCode' => :code,
    SIOC+'has_creator' => :user,
    SIOC+'user_agent' => :mailer,
    SIOC+'has_discussion' => :comments,
    SIOC+'Thread' => :openenvelope,
    SIOC+'Post' => :newspaper,
    SIOC+'MailMessage' => :envelope,
    W3+'2000/01/rdf-schema#Resource' => :node,
  }

  # file -> MIME
  def mime
    @mime ||= # memoize
      (name = path || ''
       prefix = ((File.basename name).split('.')[0]||'').downcase
       suffix = ((File.extname name)[1..-1]||'').downcase
       if node.directory? # container
         'inode/directory'
       elsif MIMEprefix[prefix] # prefix mapping
         MIMEprefix[prefix]
       elsif MIMEsuffix[suffix] # suffix mapping
         MIMEsuffix[suffix]
       elsif Rack::Mime::MIME_TYPES['.'+suffix] # suffix mapping (Rack)
         Rack::Mime::MIME_TYPES['.'+suffix]
       else # sniff inside the file. suggest adding explicit mapping
         puts "#{pathPOSIX} unmapped MIME, sniffing content (SLOW)"
         `file --mime-type -b #{Shellwords.escape pathPOSIX.to_s}`.chomp
       end)
  end

  def isRDF; %w{atom n3 rdf owl ttl}.member? ext end

  def toRDF; isRDF ? self : toJSON end

  def toJSON # cache-file readable as RDF
    return self if ext == 'e'
    hash = node.stat.ino.to_s.sha2
    doc = R['/.cache/'+hash[0..2]+'/'+hash[3..-1]+'.e'].setEnv @r # cache location
    unless doc.e && doc.m > m # update cache
      tree = {}
      triplr = Triplr[mime]
      unless triplr
        puts "WARNING missing #{mime} triplr for #{uri}"
        triplr = :triplrFile
      end
      send(*triplr){|s,p,o|
        tree[s] ||= {'uri' => s}
        tree[s][p] ||= []
        tree[s][p].push o}
      doc.writeFile tree.to_json
    end
    doc
  end

  def triplrArchive &f; yield uri, Type, R[Stat+'Archive']; triplrFile &f end
  def triplrAudio &f;   yield uri, Type, R[Sound]; triplrFile &f end
  def triplrHTML &f;    yield uri, Type, R[Stat+'HTMLFile']; triplrFile &f end
  def triplrDataFile &f; yield uri, Type, R[Stat+'DataFile']; triplrFile &f end
  def triplrSourceCode &f; yield uri, Type, R[SIOC+'SourceCode']; yield uri, Content, `pygmentize -f html #{sh}`; triplrFile &f end
  def triplrTeX;        yield stripDoc.uri, Content, `cat #{sh} | tth -r` end
  def triplrRTF          &f; triplrWord :catdoc,        &f end
  def triplrWordDoc      &f; triplrWord :antiword,      &f end
  def triplrWordXML      &f; triplrWord :docx2txt, '-', &f end
  def triplrOpenDocument &f; triplrWord :odt2txt,       &f end

  def triplrUriList; uris.map{|u|yield u, Type, R[W3+'2000/01/rdf-schema#Resource']} end
  def uris; open(pathPOSIX).readlines.map &:chomp end

  def triplrImage &f
    yield uri, Type, R[Image]
    w,h = Dimensions.dimensions pathPOSIX
    yield uri, Stat+'width', w
    yield uri, Stat+'height', h
    triplrFile &f
  end

  def triplrContainer
    s = path
    s = s + '/' unless s[-1] == '/'
    mt = mtime
    yield s, Type, R[Container]
    yield s, Mtime, mt.to_i
    yield s, Date, mt.iso8601
    dirs,files = children.partition{|e|e.node.directory?}
    dirs = dirs.map{|dir|dir+'/'}
    files = files.map &:stripDoc
    nodes = [*dirs, *files]
    nodes.map{|node| yield s, Stat+'contains', node }
    yield s, Size, nodes.size
  end

  def triplrFile
    s = path
    size.do{|sz|yield s, Size, sz}
    mtime.do{|mt|
      yield s, Mtime, mt.to_i
      yield s, Date, mt.iso8601}
  end

  def triplrWord conv, out='', &f
    triplrFile &f
    yield uri, Type, R[Stat+'WordDocument']
    yield uri, Content, '<pre>' +
                        `#{conv} #{sh} #{out}` +
                        '</pre>'
  end

  def triplrText enc=nil, &f
    doc = stripDoc.uri
    yield doc, Type, R[Stat+'TextFile']
    yield doc, Content,
    H({_: :pre, style: 'white-space: pre-wrap',
        c: readFile.do{|r|
          enc ? r.force_encoding(enc).to_utf8 : r}.hrefs})
    mtime.do{|mt|yield doc, Date, mt.iso8601}
  end

  def triplrMarkdown
    doc = stripDoc.uri
    yield doc, Type, R[Stat+'TextFile']
    yield doc, Content, ::Redcarpet::Markdown.new(::Redcarpet::Render::Pygment, fenced_code_blocks: true).render(readFile)
    mtime.do{|mt|yield doc, Date, mt.iso8601}
  end

  def triplrCSV d
    ns    = W3 + 'ns/csv#'
    lines = CSV.read pathPOSIX
    lines[0].do{|fields| # header-row
      yield uri, Type, R[ns+'Table']
      yield uri, ns+'rowCount', lines.size
      lines[1..-1].each_with_index{|row,line|
        row.each_with_index{|field,i|
          id = uri + '#row:' + line.to_s
          yield id, fields[i], field
          yield id, Type, R[ns+'Row']}}}
  end

  def triplrChatLog &f
    linenum = -1
    base = stripDoc
    dir = base.dir
    log = base.uri
    basename = base.basename
    channel = dir + '/' + basename
    network = dir + '/' + basename.split('%23')[0] + '*'
    day = dir.uri.match(/\/(\d{4}\/\d{2}\/\d{2})/).do{|d|d[1].gsub('/','-')}
    readFile.lines.map{|l|
      l.scan(/(\d\d)(\d\d)(\d\d)[\s+@]*([^\(\s]+)[\S]* (.*)/){|m|
        s = base + '#l' + (linenum += 1).to_s
        yield s, Type, R[SIOC+'InstantMessage']
        yield s, Label, m[3]
        yield s, Creator, R['#'+m[3]]
        yield s, To, channel
        yield s, Content, m[4].hrefs{|p, o|
          yield log, p, o
          yield s, p, o
        }
        yield s, Date, day+'T'+m[0]+':'+m[1]+':'+m[2] if day}}
    # summarize non-empty log
    if linenum > 0
      yield log, Type, R[SIOC+'ChatLog']
      yield log, Date, mtime.iso8601
      yield log, Creator, channel
      yield log, To, network
      yield log, Title, basename.split('%23')[-1] # channel
      yield log, Size, linenum
    end
  rescue Exception => e
    puts uri, e.class, e.message
  end

  # email
  MessageURI = -> id {h = id.sha2; ['', 'msg', h[0], h[1], h[2], id.gsub(/[^a-zA-Z0-9]+/,'.')[0..96], '#this'].join('/').R}
  def triplrMail &b
    m = Mail.read node; return unless m
    id = m.message_id || m.resent_message_id || rand.to_s.sha2 # Message-ID
    resource = MessageURI[id]; e = resource.uri                # Message-URI
    # storage paths
    srcDir = resource.path.R; srcDir.mkdir # container
    srcFile = srcDir + 'this.msg'          # location
    # link to canonical location
    ln self, srcFile unless srcFile.e rescue nil
    yield e, DC+'identifier', id         # pre-web identifier
    yield e, DC+'cache', self + '*' # webized source file
    yield e, Type, R[SIOC+'MailMessage'] # RDF typetag

    # From
    from = []
    m.from.do{|f|f.justArray.map{|f|from.push f.to_utf8.downcase }} # queue for indexing
    m[:from].do{|fr|fr.addrs.map{|a|yield e, Creator, a.display_name||a.name}} # creator name
    m['X-Mailer'].do{|m|yield e, SIOC+'user_agent', m.to_s}

    # To
    to = []
    %w{to cc bcc resent_to}.map{|p|      # recipient fields
      m.send(p).justArray.map{|r|        # recipient
        to.push r.to_utf8.downcase }}    # queue for indexing
    m['X-BeenThere'].justArray.map{|r|to.push r.to_s} # anti-loop recipient
    m['List-Id'].do{|name|yield e, To, name.decoded.sub(/<[^>]+>/,'').gsub(/[<>&]/,'')} # mailinglist name

    # Subject
    subject = nil
    m.subject.do{|s|
      subject = s.to_utf8.gsub(/\[[^\]]+\]/){|l|
        yield e, Label, l[1..-2]; nil} # emit []-wrapped tokens as RDF labels
      yield e, Title, subject}

    # Date
    date = (m.date || Time.now).to_time.utc
    dstr = date.iso8601
    yield e, Date, dstr; #yield e, Mtime, date.to_i
    dpath = '/' + dstr[0..6].gsub('-','/') + '/msg/' # month container
    [*from,*to].map{|addr| # address
      user, domain = addr.split '@' # split address parts
      apath = dpath + domain + '/' + user + '/' # address container
      yield e, (from.member? addr) ? Creator : To, R[apath+'#'+user] # To/From triple
      if subject
        mpath = apath + (dstr[8..-1] + subject).gsub(/[^a-zA-Z0-9_]+/,'.')[0..96] # append time & subject
        mpath = mpath + (mpath[-1] == '.' ? '' : '.')  + 'msg' # append filetype extension
        mloc = mpath.R # index entry
        mloc.dir.mkdir # index container
        ln self, mloc unless mloc.e rescue nil # link to index
      end}

    # references
    %w{in_reply_to references}.map{|ref|
      m.send(ref).do{|rs|
        rs.justArray.map{|r|
          dest = MessageURI[r]
          yield e, SIOC+'reply_of', dest
          destDir = dest.path.R; destDir.mkdir; destFile = destDir+'this.msg'
          # bidirectional reference link
          rev = destDir + id.sha2 + '.msg'
          rel = srcDir + r.sha2 + '.msg'
          if !rel.e # link missing
            if destFile.e # exists, create link
              ln destFile, rel rescue nil
            else # point to message anyway in case it appears
              ln_s destFile, rel rescue nil
            end
          end
          ln srcFile, rev if !rev.e rescue nil}}}

    # HTML parts
    htmlFiles, parts = m.all_parts.push(m).partition{|p|p.mime_type=='text/html'}
    htmlCount = 0
    htmlFiles.map{|p| # HTML file
      html = srcDir + "#{htmlCount}.html"  # file location
      yield e, DC+'hasFormat', html        # file pointer
      html.writeFile p.decoded  if !html.e # store
      htmlCount += 1 } # increment count

    # text parts
    parts.select{|p|
      (!p.mime_type || p.mime_type == 'text/plain') && # find text parts
        Mail::Encodings.defined?(p.body.encoding)      # ensure decoder is defined
    }.map{|p| # text part
      # represent part as RDF
      yield e, Content, (H p.decoded.to_utf8.lines.to_a.map{|l| # split lines
        l = l.chomp # strip any remaining [\n\r]
        if qp = l.match(/^((\s*[>|]\s*)+)(.*)/) # quoted line
          depth = (qp[1].scan /[>|]/).size # count > occurrences
          if qp[3].empty? # drop empty-string lines while inside quoted portion
            nil
          else # wrap quotes in <span>
            indent = "<span name='quote#{depth}'>&gt;</span>" # indentation marker with depth label
            {_: :span, class: :quote,
             c: [indent * depth,' ',
                 {_: :span, class: :quoted, c: qp[3].hrefs{|p,o|yield e, p, o}}]}
          end
        else # fresh line
          [l.hrefs{|p,o|yield e, p, o}]
        end}.compact.intersperse("\n"))} # join lines

    # message parts
    parts.select{|p|p.mime_type=='message/rfc822'}.map{|m|
      content = m.body.decoded                   # decode message-part
      f = srcDir + content.sha2 + '.inlined.msg' # message location
      f.writeFile content if !f.e                # store message
      f.triplrMail &b}                           # recursion on message-part

    # attachments
    m.attachments.select{|p|Mail::Encodings.defined?(p.body.encoding)}.map{|p|
      name = p.filename.do{|f|f.to_utf8.do{|f|!f.empty? && f}} ||                           # explicit name
             (rand.to_s.sha2 + (Rack::Mime::MIME_TYPES.invert[p.mime_type] || '.bin').to_s) # generated name
      file = srcDir + name                     # file location
      file.writeFile p.body.decoded if !file.e # store
      yield e, SIOC+'attachment', file         # file pointer
      if p.main_type=='image'                  # image attachments
        yield e, Image, file                   # image link represented in RDF
        yield e, Content,                      # image link represented in HTML
          H({_: :a, href: file.uri, c: [{_: :img, src: file.uri}, p.filename]}) # render HTML
      end }
  end

  def fetchFeeds; uris.map(&:R).map(&:fetchFeed); nil end
  def fetchFeed
    updated = false
    head = {} # request header
    cache = R['/.cache/'+uri.sha2+'/'] # storage
    etag = cache + 'etag'      # cache etag URI
    priorEtag = nil            # cache etag value
    mtime = cache + 'mtime'    # cache mtime URI
    priorMtime = nil           # cache mtime value
    body = cache + 'body.atom' # cache body URI
    if etag.e
      priorEtag = etag.readFile
      head["If-None-Match"] = priorEtag unless priorEtag.empty?
    elsif mtime.e
      priorMtime = mtime.readFile.to_time
      head["If-Modified-Since"] = priorMtime.httpdate
    end
    begin # conditional GET
      open(uri, head) do |response|
        curEtag = response.meta['etag']
        curMtime = response.last_modified || Time.now rescue Time.now
        etag.writeFile curEtag if curEtag && !curEtag.empty? && curEtag != priorEtag # new ETag value
        mtime.writeFile curMtime.iso8601 if curMtime != priorMtime # new Last-Modified value
        resp = response.read
        unless body.e && body.readFile == resp
          updated = true
          body.writeFile resp # update cache
          ('file:'+body.pathPOSIX).R.indexFeed :format => :feed, :base_uri => uri # run indexer
        end
      end
    rescue OpenURI::HTTPError => error
      msg = error.message
      puts [uri,msg].join("\t") unless msg.match(/304/) # warn on error unless it's just a cache-hit (304)
    end
    updated ? self : nil
  rescue Exception => e
    puts uri, e.class, e.message
  end
  alias_method :getFeed, :fetchFeed
  def feeds; (nokogiri.css 'link[rel=alternate]').map{|u|join u.attr :href} end

  def indexFeed options = {}
    g = RDF::Repository.load self, options
    g.each_graph.map{|graph|
      graph.query(RDF::Query::Pattern.new(:s,R[R::Date],:o)).first_value.do{|t| # find timestamp
        time = t.gsub(/[-T]/,'/').sub(':','/').sub /(.00.00|Z)$/, ''
        slug = (graph.name.to_s.sub(/https?:\/\//,'.').gsub(/[\W_]/,'..').sub(/\d{12,}/,'')+'.').gsub(/\.+/,'.')[0..127].sub(/\.$/,'')
        doc =  R["/#{time}#{slug}.ttl"]
        unless doc.e
          doc.dir.mkdir
          cacheBase = doc.stripDoc
          graph << RDF::Statement.new(graph.name, R[DC+'cache'], cacheBase)
          RDF::Writer.open(doc.pathPOSIX){|f|f << graph}
          puts cacheBase
        end
        true}}
    self
  rescue Exception => e
    puts uri, e.class, e.message
  end

  # example CSS-selector triplr. see Scraper repo
  def triplrTwitter
    base = 'https://twitter.com'
    nokogiri.css('div.tweet > div.content').map{|t|
      s = base + t.css('.js-permalink').attr('href') # subject URI
      authorName = t.css('.username b')[0].inner_text
      author = R[base+'/'+authorName]
      ts = Time.at(t.css('[data-time]')[0].attr('data-time').to_i).iso8601
      yield s, Type, R[SIOC+'Tweet']
      yield s, Date, ts
      yield s, Creator, author
      yield s, To, (base+'/#twitter').R
      yield s, Label, authorName
      content = t.css('.tweet-text')[0]
      content.css('a').map{|a| # resolve URIs relative to origin
        a.set_attribute('href',base + (a.attr 'href')) if (a.attr 'href').match /^\//
        yield s, DC+'link', R[a.attr 'href']
      }
      yield s, Content, StripHTML[content.inner_html].gsub(/<\/?span[^>]*>/,'').gsub(/\n/,'').gsub(/\s+/,' ')}
  end

  def indexTweets; graph = {}
    triplrTwitter{|s,p,o|graph[s]||={'uri'=>s}; graph[s][p]||=[]; graph[s][p].push o}
    graph.map{|u,r|
      r[Date].do{|t|
          slug = (u.sub(/https?/,'.').gsub(/\W/,'.')).gsub /\.+/,'.'
          time = t[0].to_s.gsub(/[-T]/,'/').sub(':','/').sub /(.00.00|Z)$/, ''
          doc = "/#{time}#{slug}.e".R
          puts u unless doc.e
          doc.writeFile({u => r}.to_json) unless doc.e}}
  end

  # Reader for JSON-cache format
  module Format
    class Format < RDF::Format
      content_type     'application/json+rdf', :extension => :e
      content_encoding 'utf-8'
      reader { R::Format::Reader }
    end
    class Reader < RDF::Reader
      format Format
      def initialize(input = $stdin, options = {}, &block)
        @graph = JSON.parse (input.respond_to?(:read) ? input : StringIO.new(input.to_s)).read
        @base = options[:base_uri]
        if block_given?
          case block.arity
          when 0 then instance_eval(&block)
          else block.call(self)
          end
        end
        nil
      end
      def each_statement &fn
        @graph.map{|s,r|
          r.map{|p,o|
            o.justArray.map{|o|
              fn.call RDF::Statement.new(@base.join(s), RDF::URI(p),
                        o.class==Hash ? @base.join(o['uri']) : (l = RDF::Literal o
                                                              l.datatype=RDF.XMLLiteral if p == Content
                                                              l))} unless p=='uri'}}
      end
      def each_triple &block; each_statement{|s| block.call *s.to_triple} end
    end
  end

  # Reader for Atom and RSS
  module Feed
    class Format < RDF::Format
      content_type     'application/atom+xml', :extension => :atom
      content_encoding 'utf-8'
      reader { R::Feed::Reader }
    end
    class Reader < RDF::Reader
      format Format
      def initialize(input = $stdin, options = {}, &block)
        @doc = (input.respond_to?(:read) ? input : StringIO.new(input.to_s)).read.utf8
        @base = options[:base_uri]
        if block_given?
          case block.arity
          when 0 then instance_eval(&block)
          else block.call(self)
          end
        end
        nil
      end
      def each_triple &block; each_statement{|s| block.call *s.to_triple} end
      def each_statement &fn # triples flow (left ← right) across stream-transformers
        resolveURIs(:normalizeDates, :normalizePredicates,:rawTriples){|s,p,o|
          fn.call RDF::Statement.new(s.R, p.R,
                                     (o.class == R || o.class == RDF::URI) ? o : (l = RDF::Literal (if p == Content
                                                                             R::StripHTML[o]
                                                                           else
                                                                             o.gsub(/<[^>]*>/,' ')
                                                                           end)
                                                         l.datatype=RDF.XMLLiteral if p == Content
                                                         l), :graph_name => s.R)}
      end
      def resolveURIs *f
        send(*f){|s,p,o|
          if p==Content && o.class==String
            content = Nokogiri::HTML.fragment o
            content.css('img').map{|i|
              (i.attr 'src').do{|src|
                yield s, Image, src.R }}
            content.css('a').map{|a|
              (a.attr 'href').do{|href|
                link = s.R.join href
                a.set_attribute 'href', link
                yield s, DC+'link', link
                yield s, Image, link if %w{gif jpg png webp}.member? link.R.ext.downcase
              }}
            yield s, p, content.to_xhtml
          else
            yield s, p, o
          end
        }
      end
      def normalizePredicates *f
        send(*f){|s,p,o|
          yield s,
                {DCe+'subject' => Title,
                 DCe+'type' => Type,
                 RSS+'title' => Title,
                 RSS+'description' => Content,
                 RSS+'encoded' => Content,
                 RSS+'modules/slash/comments' => SIOC+'num_replies',
                 RSS+'modules/content/encoded' => Content,
                 RSS+'category' => Label,
                 RSS+'source' => DC+'source',
                 Harvard+'author' => Creator,
                 Harvard+'subtitle' => Title,
                 Harvard+'WPID' => Label,
                 Harvard+'affiliation' => Creator,
                 Podcast+'keywords' => Label,
                 Podcast+'subtitle' => Title,
                 Podcast+'author' => Creator,
                 Atom+'displaycategories' => Label,
                 'http://newsoffice.mit.edu/ns/tags' => Label,
                 Atom+'content' => Content,
                 Atom+'summary' => Content,
                 Atom+'enclosure' => SIOC+'attachment',
                 Atom+'title' => Title,
                }[p]||p, o }
      end
      def normalizeDates *f
        send(*f){|s,p,o|
          yield *({'CreationDate' => true,
                    'Date' => true,
                    RSS+'pubDate' => true,
                    Date => true,
                    DCe+'date' => true,
                    Atom+'published' => true,
                    Atom+'updated' => true
                  }[p] ?
                  [s,Date,Time.parse(o).utc.iso8601] : [s,p,o])}
      end
      def rawTriples
        # elements
        reHead = /<(rdf|rss|feed)([^>]+)/i
        reXMLns = /xmlns:?([a-z0-9]+)?=["']?([^'">\s]+)/
        reItem = %r{<(?<ns>rss:|atom:)?(?<tag>item|entry)(?<attrs>[\s][^>]*)?>(?<inner>.*?)</\k<ns>?\k<tag>>}mi
        reElement = %r{<([a-z0-9]+:)?([a-z]+)([\s][^>]*)?>(.*?)</\1?\2>}mi
        # identifiers
        reRDF = /about=["']?([^'">\s]+)/              # RDF @about
        reLink = /<link>([^<]+)/                      # <link> element
        reLinkCData = /<link><\!\[CDATA\[([^\]]+)/    # <link> CDATA block
        reLinkHref = /<link[^>]+rel=["']?alternate["']?[^>]+href=["']?([^'">\s]+)/ # <link> @href @rel=alternate
        reLinkRel = /<link[^>]+href=["']?([^'">\s]+)/ # <link> @href
        reId = /<(?:gu)?id[^>]*>([^<]+)/              # <id> element
        # media links
        reAttach = %r{<(link|enclosure|media)([^>]+)>}mi
        reSrc = /(href|url|src)=['"]?([^'">\s]+)/
        reRel = /rel=['"]?([^'">\s]+)/
        commentRe = /\/comments\//
        x = {} # XML-namespace table
        head = @doc.match(reHead)
        head && head[2] && head[2].scan(reXMLns){|m|
          prefix = m[0]
          base = m[1]
          base = base + '#' unless %w{/ #}.member? base [-1]
          x[prefix] = base}
        @doc.scan(reItem){|m|
          attrs = m[2]
          inner = m[3]
          # find post identifier
          u = (attrs.do{|a|a.match(reRDF)} || inner.match(reLink) || inner.match(reLinkCData) || inner.match(reLinkHref) || inner.match(reLinkRel) || inner.match(reId)).do{|s|s[1]}
          if u
            unless u.match /^http/ # resolve relative URIs
              u = (URI.join @base, u).to_s
            end
            resource = u.R
            if u.match commentRe
              yield u, R::Type, R[R::Post]
              yield u, R::To, R[resource.uri.match(commentRe).pre_match]
            else
              yield u, Type, R[SIOC+'BlogPost']
              blogs = [resource.join('/')]
              blogs.push @base.R.join('/') if @base.R.host != resource.host # reblog
              blogs.map{|blog| yield u, R::To, blog}
            end
            inner.scan(reAttach){|e| # media links
              e[1].match(reSrc).do{|url|
                rel = e[1].match reRel
                if rel
                  o = url[2].R
                  p = case o.ext.downcase
                      when 'jpg'
                        R::Image
                      when 'png'
                        R::Image
                      else
                        R::Atom + rel[1]
                      end
                  yield u, p, o
                end}}
            inner.scan(reElement){|e| # elements
              p = (x[e[0] && e[0].chop]||R::RSS) + e[1]                  # expand property-name
              if [Atom+'id',RSS+'link',RSS+'guid',Atom+'link'].member? p # custom element-type handlers
                # used in subject URI search
              elsif [Atom+'author', RSS+'author', RSS+'creator', DCe+'creator'].member? p # author
                uri = e[3].match /<uri>([^<]+)</
                name = e[3].match /<name>([^<]+)</
                yield u, Creator, e[3].do{|o|o.match(/\A(\/|http)[\S]+\Z/) ? o.R : o } unless name||uri
                yield u, Creator, name[1] if name
                yield u, Creator, uri[1].R if uri
              else # generic element
                yield u,p,e[3].do{|o|
                  case o
                  when /^\s*<\!\[CDATA/m
                    o.sub /^\s*<\!\[CDATA\[(.*?)\]\]>\s*$/m,'\1'
                  when /</m
                    o
                  else
                    CGI.unescapeHTML o
                  end
                }.do{|o|o.match(/\A(\/|http)[\S]+\Z/) ? o.R : o }
              end
            }
          end}
      end
    end
  end

  # writer for Atom and RSS
  FEED = -> d,e {
    H(['<?xml version="1.0" encoding="utf-8"?>',
       {_: :feed,xmlns: 'http://www.w3.org/2005/Atom',
         c: [{_: :id, c: e.uri},
             {_: :title, c: "Atom feed for " + e.uri},
             {_: :link, rel: :self, href: e.uri},
             {_: :updated, c: Time.now.iso8601},
             d.map{|u,d|
               {_: :entry,
                 c: [{_: :id, c: u}, {_: :link, href: u},
                     d[Date].do{|d|   {_: :updated, c: d[0]}},
                     d[Title].do{|t|  {_: :title,   c: t}},
                     d[Creator].do{|c|{_: :author,  c: c[0]}},
                     {_: :content, type: :xhtml,
                       c: {xmlns:"http://www.w3.org/1999/xhtml",
                           c: d[Content]}}]}}]}])}

end

module Redcarpet
  module Render
    class Pygment < HTML
      def block_code(code, lang)
        if lang
          IO.popen("pygmentize -l #{lang.downcase.sh} -f html",'r+'){|p|
            p.puts code
            p.close_write
            p.read
          }
        else
          code
        end
      end
    end
  end
end
2017-09-24T22:56:09+00:00 30804
ruby 2017-09-24T22:56:09+00:00 9
HTTP.rb
# coding: utf-8
class R

  def R.call e
    return [405,{},[]] unless %w{HEAD GET}.member? e['REQUEST_METHOD']
    return [404,{},[]] if e['REQUEST_PATH'].match(/\.php$/i)
    rawpath = e['REQUEST_PATH'].utf8.gsub /[\/]+/, '/'   # collapse sequential /s
    path = Pathname.new(rawpath).expand_path.to_s        # evaluate path
    path += '/' if path[-1] != '/' && rawpath[-1] == '/' # preserve trailing-slash
    resource = path.R; e['uri'] = resource.uri           # resource URI
    e[:Response]={}; e[:Links]={}                        # response header
#    puts e['HTTP_USER_AGENT']
    resource.setEnv(e).send e['REQUEST_METHOD']          # call method
  rescue Exception => x
    msg = [x.class,x.message,x.backtrace].join "\n"
    [500,{'Content-Type' => 'text/html'},
     ["<html><head><style>body {background-color:#222; font-size:1.2em; text-align:center}\npre {text-align:left; display:inline-block; background-color:#000; color:#fff; font-weight:bold; border-radius:.6em; padding:1em}\n.number {color:#0f0; font-weight:normal; font-size:1.1em}</style></head><body><pre>",
      msg.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;').gsub(/([0-9\.]+)/,'<span class=number>\1</span>'),
      '</pre></body></html>']]
  end

  def HEAD; self.GET.do{|s,h,b|[s,h,[]]} end

  def GET
    return fileGET if file?
    qs = @r['QUERY_STRING'] && !@r['QUERY_STRING'].empty? && ('?' + @r['QUERY_STRING']) || ''
    return [303,@r[:Response].update({'Location'=> Time.now.strftime('/%Y/%m/%d/%H/')+(qs.empty? ? '?head' : qs)}),[]] if path=='/t'
    return [303,@r[:Response].update({'Location'=> Time.now.strftime('/%Y/%m/%d/%H/?feed')}),[]] if path=='/feed'

    # time pointers
    parts = path[1..-1].split '/'
    dp = []; dp.push parts.shift.to_i while parts[0] && parts[0].match(/^[0-9]+$/)
    n = nil; p = nil
    case dp.length
    when 1 # Y
      year = dp[0]
      n = '/' + (year + 1).to_s
      p = '/' + (year - 1).to_s
    when 2 # Y-m
      year = dp[0]
      m = dp[1]
      n = m >= 12 ? "/#{year + 1}/#{01}" : "/#{year}/#{'%02d' % (m + 1)}"
      p = m <=  1 ? "/#{year - 1}/#{12}" : "/#{year}/#{'%02d' % (m - 1)}"
    when 3 # Y-m-d
      day = ::Date.parse "#{dp[0]}-#{dp[1]}-#{dp[2]}" rescue nil
      if day
        p = (day-1).strftime('/%Y/%m/%d')
        n = (day+1).strftime('/%Y/%m/%d')
      end
    when 4 # Y-m-d-H
      day = ::Date.parse "#{dp[0]}-#{dp[1]}-#{dp[2]}" rescue nil
      if day
        hour = dp[3]
        p = hour <=  0 ? (day - 1).strftime('/%Y/%m/%d/23') : (day.strftime('/%Y/%m/%d/')+('%02d' % (hour-1)))
        n = hour >= 23 ? (day + 1).strftime('/%Y/%m/%d/00') : (day.strftime('/%Y/%m/%d/')+('%02d' % (hour+1)))
      end
    end
    sl = parts.empty? ? '' : (path[-1] == '/' ? '/' : '')
    @r[:Links][:prev] = p + '/' + parts.join('/') + sl + qs if p && R[p].e
    @r[:Links][:next] = n + '/' + parts.join('/') + sl + qs if n && R[n].e
    @r[:Links][:up] = dirname + (dirname == '/' ? '' : '/') + qs

    set = (if node.directory?
           if (q.has_key? 'find') && path!='/' # FIND(1) nodes
             find q['find']
           elsif (q.has_key? 'q') && path!='/' # GREP(1) nodes
             grep q['q']
           else # basic container
             if uri[-1] == '/' # inside container
               if q.has_key? 'head' # summary view
                 q_ = q.dup; q_.delete 'head' # link to full full view
                 @r[:Links][:down] = path + R.qs(q_) # down to children
               end
               (self+'index.*').glob || [self, children] # inlined children
             else # outside container
               @r[:Links][:down] = path + '/' + qs # down to children
               self # just the container
             end
           end
          else # arbitrary or extension-wildcard glob
            (match(/\*/) ? self : (self+'.*')).glob
           end).justArray.flatten.compact.select &:exist?

    return notfound if !set || set.empty?

    @r[:Response].update({'Link' => @r[:Links].map{|type,uri|"<#{uri}>; rel=#{type}"}.intersperse(', ').join}) unless @r[:Links].empty?
    @r[:Response].update({'Content-Type' => format, 'ETag' => [set.sort.map{|r|[r,r.m]}, format].join.sha2})
    condResponse ->{ # body
      if set.size == 1 && set[0].mime == format
        set[0] # static body
      else # dynamic body
        if format == 'text/html'
          HTML[R.load(set),self]
        elsif format == 'application/atom+xml'
          FEED[R.load(set),self]
        else
          load(set).dump (RDF::Writer.for :content_type => format).to_sym, :base_uri => self, :standard_prefixes => true
        end
      end}
  end

  def grep q
    words = R.tokens q
    case words.size # unordered && (AND) terms
    when 2
      cmd = "grep -rilZ #{words[0].sh} #{sh} | xargs -0 grep -il #{words[1].sh}"
    when 3
      cmd = "grep -rilZ #{words[0].sh} #{sh} | xargs -0 grep -ilZ #{words[1].sh} | xargs -0 grep -il #{words[2].sh}"
    when 4
      cmd = "grep -rilZ #{words[0].sh} #{sh} | xargs -0 grep -ilZ #{words[1].sh} | xargs -0 grep -ilZ #{words[2].sh} | xargs -0 grep -il #{words[3].sh}"
    else # terms in submitted order
      pattern = words.join '.*'
      cmd = "grep -ril #{pattern.sh} #{sh}"
    end
    `#{cmd} | head -n 1024`.lines.map{|matchingFile| R.fromPOSIX matchingFile.chomp}
  end

  # JSON loader
  def R.load set
    graph = RDF::Graph.new # input graph
    g = {}                 # output tree
    rdf,nonRDF = set.partition &:isRDF # partition node types
    # load RDF formats
    rdf.map{|n|graph.load n.pathPOSIX, :base_uri => n}
    # RDF triples to tree
    graph.each_triple{|s,p,o| # each triple
      s = s.to_s; p = p.to_s # subject/predicate URI keys
      o = [RDF::Node, RDF::URI, R].member?(o.class) ? o.R : o.value # normalize resource classes
      g[s]||={'uri'=>s}; g[s][p]||=[]; g[s][p].push o unless g[s][p].member? o} # add
    # load JSON format
    nonRDF.map{|n| (JSON.parse n.toJSON.readFile).map{|s,re| # each subject
        re.map{|p,o| # each predicate/object
          o.justArray.map{|o| # each triple
            o = o.R if o.class==Hash # normalize resource classes
            g[s]||={'uri'=>s}; g[s][p]||=[]; g[s][p].push o unless g[s][p].member? o} unless p == 'uri' }}} # add
    g # tree-graph
  end

  # RDF loader
  def load set
    g = RDF::Graph.new
    set.map{|n|
      g.load n.toRDF.pathPOSIX, :base_uri => n.stripDoc}
    g # RDF graph
  end


  def fileGET
    @r[:Response].update({'Content-Type' => mime, 'ETag' => [m,size].join.sha2})
    @r[:Response].update({'Cache-Control' => 'no-transform'}) if mime.match /^(audio|image|video)/
    if q.has_key?('thumb') && ext.match(/(mp4|mkv|png|jpg)/i)
      if !thumb.e
        if mime.match(/^video/)
          `ffmpegthumbnailer -s 256 -i #{sh} -o #{thumb.sh}`
        else
          `gm convert #{sh} -thumbnail "256x256" #{thumb.sh}`
        end
      end
      thumb.e && thumb.setEnv(@r).condResponse || notfound
    else
      condResponse
    end
  end

  def condResponse body=nil
    etags = @r['HTTP_IF_NONE_MATCH'].do{|m| m.strip.split /\s*,\s*/ }
    if etags && (etags.include? @r[:Response]['ETag'])
      [304, {}, []]
    else
      body = body ? body.call : self
      if body.class == R # file-ref. use Rack::File handler                                       add our headers
        (Rack::File.new nil).serving((Rack::Request.new @r), body.pathPOSIX).do{|s,h,b|[s,h.update(@r[:Response]),b]}
      else
        [(@r[:Status]||200), @r[:Response], [body]]
      end
    end
  end

  def notfound
    [404,{'Content-Type' => 'text/html'},[HTML[{},self]]]
  end

  def accept
    @accept ||= (
      d={}
      @r['HTTP_ACCEPT'].do{|k|
        (k.split /,/).map{|e| # each pair
          f,q = e.split /;/   # split MIME from q value
          i = q && q.split(/=/)[1].to_f || 1.0 # q || default
          d[i] ||= []; d[i].push f.strip}} # append
      d)
  end

  def q # memoize query args
    @q ||=
      (if q = @r['QUERY_STRING']
       h = {}
       q.split(/&/).map{|e|
         k, v = e.split(/=/,2).map{|x|CGI.unescape x}
         h[(k||'').downcase] = v}
       h
      else
        {}
       end)
  end

  def R.qs h # {k: v} -> query-string
    '?'+h.map{|k,v|
      k.to_s + '=' + (v ? (CGI.escape [*v][0].to_s) : '')}.intersperse("&").join('')
  end

  def format; @format ||= selectFormat end

  def selectFormat
    # query-string arg
    return 'application/atom+xml' if q.has_key?('feed')

    # Accept header
    accept.sort.reverse.map{|q,formats| # highest qval first
      formats.map{|mime| # serializability check
        return mime if RDF::Writer.for(:content_type => mime) || Writable.member?(mime)}}

    # default
    'text/html'
  end
  
  end
2017-09-19T08:57:27+00:00 8709
HTML.rb
# coding: utf-8
def H x # data to HTML
  case x
  when String
    x
  when Hash # element
    void = [:img, :input, :link, :meta].member? x[:_]
    '<' + (x[:_] || 'div').to_s +                        # element name
      (x.keys - [:_,:c]).map{|a|                         # attribute name
      ' ' + a.to_s + '=' + "'" + x[a].to_s.chars.map{|c| # attribute value
        {"'"=>'%27', '>'=>'%3E',
         '<'=>'%3C'}[c]||c}.join + "'"}.join +
      (void ? '/' : '') + '>' + (H x[:c]) +              # children
      (void ? '' : ('</'+(x[:_]||'div').to_s+'>'))       # element closer
  when Array # sequential structure
    x.map{|n|H n}.join
  when R # <a>
    H({_: :a, href: x.uri, c: x.label})
  when NilClass
    ''
  else
    CGI.escapeHTML x.to_s
  end
end

class String
  def R; R.new self end
  # scan for HTTP URIs in string. example:
  # demo on the site (https://demohere) and source-code at https://sourcehere.
  # [,.] only match mid-URI, opening ( required for ) capture, <> wrapping is stripped
  def hrefs &b
    pre,link,post = self.partition /(https?:\/\/(\([^)>\s]*\)|[,.]\S|[^\s),.”\'\"<>\]])+)/
    u = link.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;') # escape URI
    pre.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;') +    # escape pre-match
      (link.empty? && '' || '<a href="' + u + '">' + # hyperlink
       (if u.match(/(gif|jpg|jpeg|jpg:large|png|webp)$/i) # image?
        yield(R::Image,u.R) if b # emit image
        "<img src='#{u}'/>"           # inline image
       else
         yield(R::DC+'link',u.R) if b # emit link
         u.sub(/^https?.../,'')       # innertext
        end) + '</a>') +
      (post.empty? && '' || post.hrefs(&b)) # process post-match tail
  rescue Exception => x
    puts [x.class,x.message,self[0..127]].join(" ")
    ""
  end
  def sha2; Digest::SHA2.hexdigest self end
  def to_utf8; encode('UTF-8', undef: :replace, invalid: :replace, replace: '?') end
  def utf8; force_encoding 'UTF-8' end
  def sh; Shellwords.escape self end
end

class R

  def nokogiri; Nokogiri::HTML.parse (open uri).read end

  StripHTML = -> body, loseTags=%w{iframe script style}, keepAttr=%w{alt href rel src title type} {
    html = Nokogiri::HTML.fragment body
    loseTags.map{|tag| html.css(tag).remove} if loseTags
    html.traverse{|e|
      e.attribute_nodes.map{|a|
        a.unlink unless keepAttr.member? a.name}} if keepAttr
    html.to_xhtml(:indent => 0)}

  HTML = -> graph, re { e=re.env
    e[:title] = graph[re.path+'#this'].do{|r|r[Title].justArray[0]}
    re.path!='/' && !graph.empty? && re.q['q'].do{|q|Grep[graph,q]}
    # tree-graph -> HTML-Ruby -> HTML-String
    upPage = e[:Links][:up].do{|u|[{_: :a, c: '&#9650;', id: :Up, rel: :up, href: (CGI.escapeHTML u.to_s)},'<br clear=all>']} unless re.path=='/'
    prevPage = e[:Links][:prev].do{|p|{_: :a, c: '&#9664;', rel: :prev, href: (CGI.escapeHTML p.to_s)}}
    nextPage = e[:Links][:next].do{|n|{_: :a, c: '&#9654;', rel: :next, href: (CGI.escapeHTML n.to_s)}}
    downPage = e[:Links][:down].do{|d|['<br clear=all>',{_: :a, c: '&#9660;', id: :Down, rel: :down, href: (CGI.escapeHTML d.to_s)}]}
    images = graph.keys.grep /(jpg|png)$/i
    template = images.size==graph.keys.size ? Gallery : TabularView
    H ["<!DOCTYPE html>\n",
       {_: :html,
        c: [{_: :head,
             c: [{_: :meta, charset: 'utf-8'},
                 {_: :link, rel: :icon, href: '/.icon.png'},
                 {_: :title, c: e[:title]||re.path},
                 e[:Links].do{|links|links.map{|type,uri| {_: :link, rel: type, href: CGI.escapeHTML(uri.to_s)}}},
                 {_: :script, c: R['/js/ui.js'].readFile}, {_: :style, c: R['/css/base.css'].readFile}]},
            {_: :body,
             c: [upPage, prevPage, nextPage, # page links
                 template[graph,re],
                 ([{_: :style, c: "body {text-align:center;background-color:##{'%06x' % (rand 16777216)}}"},
                   {_: :span,style: 'font-size:12em;font-weight:bold',c: 404}] if graph.empty?),
                 ([prevPage,nextPage] if graph.keys.size > 8), downPage]}]}]}

  InlineMeta = [Title, Image, Content, Label, DC+'hasFormat', DC+'link', SIOC+'attachment', SIOC+'user_agent', Stat+'contains']
  VerboseMeta = [DC+'identifier', DC+'source', DCe+'rights', DCe+'publisher', RSS+'comments', RSS+'em', RSS+'category', Atom+'edit', Atom+'self', Atom+'replies', Atom+'alternate',SIOC+'has_discussion', SIOC+'reply_of', SIOC+'num_replies', Mtime, Podcast+'explicit', Podcast+'summary', "http://wellformedweb.org/CommentAPI/commentRss","http://rssnamespace.org/feedburner/ext/1.0#origLink","http://purl.org/syndication/thread/1.0#total","http://search.yahoo.com/mrss/content",Harvard+'featured']
  TabularView = -> g, e {
    e.env[:label] = {}; (1..10).map{|i|e.env[:label]["quote"+i.to_s] = true} # colorize up to 10-levels of quoting
    [:links,:images].map{|p| e.env[p] = []} # link and image lists for deduplication
    p = e.q['sort'] || Date
    direction = e.q.has_key?('ascending') ? :id : :reverse
    datatype = [R::Size,R::Stat+'mtime'].member?(p) ? :to_i : :to_s
    keys = ['uri', Type, g.values.select{|v|v.respond_to? :keys}.map(&:keys)].flatten.uniq
    keys -= InlineMeta; keys -= VerboseMeta unless e.q.has_key? 'full'
    [{_: :table,
      c: [{_: :tbody,
           c: g.values.sort_by{|s|
             ((if p == 'uri'
               s[Title] || s[Label] || s.uri
              else
                s[p]
               end).justArray[0]||0).send datatype}.send(direction).map{|r| # sort rows
             TableRow[r,e,p,direction,keys]}.intersperse("\n")},          # render row
          {_: :tr, c: keys.map{|k| # header row
             q = e.q.merge({'sort' => k})
             if direction == :id # direction toggle
               q.delete 'ascending'
             else
               q['ascending'] = ''
             end
             href = CGI.escapeHTML R.qs q
             {_: :th,href: href,property: k,class: k==p ? 'selected' : '',c: {_: :a,href: href,class: Icons[k]||'',c: Icons[k] ? '' : (k.R.fragment||k.R.basename)}}}}]},
     {_: :style, c: ".focus, .focus a {background-color:##{'%06x' % (rand 16777216)};color:#fff;font-size:1.2em}\n"},
     {_: :style, c: e.env[:label].map{|name,_| "[name=\"#{name}\"] {color:#000;background-color: #{'#%06x' % (rand 16777216)}}\n"}},
     {_: :style, c: "[property=\"#{p}\"] {border-color:#999;border-style: solid; border-width: 0 0 .1em 0}"}]}

  Gallery = -> graph,e,_=true {
    [{_: :style, c: R['/css/photo.css'].readFile},
     {_: :style, c: R['/css/misc/default-skin.css'].readFile},
     {_: :script, c: R['/js/photoswipe.min.js'].readFile},
     {_: :script, c: R['/js/photoswipe-ui.min.js'].readFile},
     graph.keys.map{|i|
       {_: :a, class: :thumb, id: 'a'+rand.to_s.sha2[0..7], href: i.R.basename, c: {_: :img, src: i.R.thumb.basename}}},
     %q{<!-- Root element of PhotoSwipe. Must have class pswp. --> <div class="pswp" tabindex="-1" role="dialog" aria-hidden="true"> <!-- Background of PhotoSwipe.          It's a separate element as animating opacity is faster than rgba(). --> <div class="pswp__bg"></div> <!-- Slides wrapper with overflow:hidden. --> <div class="pswp__scroll-wrap"> <!-- Container that holds slides.             PhotoSwipe keeps only 3 of them in the DOM to save memory.             Don't modify these 3 pswp__item elements, data is added later on. --> <div class="pswp__container"> <div class="pswp__item"></div> <div class="pswp__item"></div> <div class="pswp__item"></div> </div> <!-- Default (PhotoSwipeUI_Default) interface on top of sliding area. Can be changed. --> <div class="pswp__ui pswp__ui--hidden"> <div class="pswp__top-bar"> <!--  Controls are self-explanatory. Order can be changed. --> <div class="pswp__counter"></div> <button class="pswp__button pswp__button--close" title="Close (Esc)"></button> <button class="pswp__button pswp__button--share" title="Share"></button> <button class="pswp__button pswp__button--fs" title="Toggle fullscreen"></button> <button class="pswp__button pswp__button--zoom" title="Zoom in/out"></button> <!-- Preloader demo http://codepen.io/dimsemenov/pen/yyBWoR --> <!-- element will get class pswp__preloader--active when preloader is running --> <div class="pswp__preloader"> <div class="pswp__preloader__icn"> <div class="pswp__preloader__cut"> <div class="pswp__preloader__donut"></div> </div> </div> </div> </div> <div class="pswp__share-modal pswp__share-modal--hidden pswp__single-tap"> <div class="pswp__share-tooltip"></div> </div> <button class="pswp__button pswp__button--arrow--left" title="Previous (arrow left)"> </button> <button class="pswp__button pswp__button--arrow--right" title="Next (arrow right)"> </button> <div class="pswp__caption"> <div class="pswp__caption__center"></div> </div> </div> </div> </div>},
     {_: :script, c: "
      var items = #{graph.keys.map{|k|{src: k.R.basename, msrc: k.R.thumb.basename, w: graph[k][Stat+'width'][0], h: graph[k][Stat+'height'][0]}}.to_json};
      var gallery = new PhotoSwipe( document.querySelectorAll('.pswp')[0], PhotoSwipeUI_Default, items, {index: #{e.q['start']||0}});
      gallery.init();
"}]}

  TableRow = -> l,e,sort,direction,keys {
    #id 
    this = l.R
    href = this.uri
    head = e.q.has_key? 'head'
    rowID = if e.path == this.path && this.fragment
              this.fragment
            else
              'r' + href.sha2
            end
    focus = !this.fragment && this.path==e.path

    # type
    types = l.types
    isChat = types.member? SIOC+'InstantMessage'
    isImg = types.member? Image
    isMail = types.member? SIOC+'MailMessage'
    isBlog = types.member? SIOC+'BlogPost'
    isTweet = types.member? SIOC+'Tweet'
    monospace = isChat || isMail

    # date
    date = l[Date].justArray.sort[-1]
    datePath = '/' + date[0..13].gsub(/[-T:]/,'/') if date

    # name. required for overview escalation
    # prefer explicit title with fs-metadata fallback
    names = []
    l[Title].do{|t|names.concat t.justArray}
    unless !names.empty? || !this.path || isTweet || monospace
      fsName = (URI.unescape (File.basename this.path))[0..64]
      names.push(focus && e.env[:title] || fsName)
    end

    # link resource-entry in index context
    indexContext = -> p,v {
      v = v.R
      if isMail # address*month
        {_: :a, href: v.path + '?head#r' + href.sha2, c: v.label}
      elsif isTweet
        if p == Creator # creator*day
          {_: :a, href: datePath[0..-4] + '*/*twitter*' + v.path[1..-1] + '*#r' + href.sha2, c: v.label}
        elsif p == To # host*hour
          {_: :a, href: datePath + '*twitter*#r' + href.sha2, c: v.label}
        end
      elsif isBlog # host*day
        {_: :a, href: datePath[0..-4] + '*/*' + v.host + '*?head#r' + href.sha2, c: v.label}
      else
        v
      end}

    # show unless nameless resource in heading-only mode
    if !head || !names.empty?
      {_: :tr, href: href, class: focus ? 'focus' : '',
       c: keys.map{|k|
         {_: :td, property: k,
          c: case k
             when 'uri'
               # names
               [names.map{|name|{_: :a, class: :title, href: href, c: (CGI.escapeHTML name.to_s)}}.intersperse(' '), ' ',
                # labels
                l[Label].justArray.map{|v|
                  label = (v.respond_to?(:uri) ? (v.R.fragment || v.R.basename) : v).to_s
                  lbl = label.downcase.gsub(/[^a-zA-Z0-9_]/,'')
                  e.env[:label][lbl] = true
                  [{_: :a, class: :label, href: href, name: lbl, c: (CGI.escapeHTML label)},' ']},
                # containment
                (l[Stat+'contains'].justArray.sort_by(&:uri).do{|cs|
                  {class: :containers, c: cs.map{|c|{_: :a, href: c.uri, c: c.label+' '}.update(focus ? {id: 'c_'+c.uri.sha2} : {})}} unless cs.empty?} unless focus),
                # links
                (links = [DC+'link',
                          SIOC+'attachment',
                          DC+'hasFormat'].map{|p|l[p]}.flatten.compact.map(&:R).select{|l|!e.env[:links].member? l} # unseen links
                 links.map{|l|e.env[:links].push l} # mark as visited
                 {_: :table, class: :links, # show
                  c: links.group_by(&:host).map{|host,links|
                    e.env[:label][host] = true
                    small = links.size < 5
                    {_: :tr,
                     c: [{_: :td, class: :host, c: ({_: :a, name: host, href: '//'+host, c: host.sub(/^www\./,'')} if host)},
                         {_: :td, class: :path, c: links.map{|link|
                            {_: :a, name: host, href: link.uri,
                                   c: CGI.escapeHTML(link.label[0..64])}.update(small ? {id: 'link_'+rand.to_s.sha2} : {})}.intersperse(' ')}]}}} unless links.empty?),
                (l[Content].justArray.map{|c|monospace ? {_: :pre,c: c} : c} unless head),
                # images
                (images = [] # image list
                 images.push this if isImg       # subject of triple
                 l[Image].do{|i|images.concat i} #  object of triple
                 images.map(&:R).select{|i|!e.env[:images].member? i}.map{|img| # unseen images
                   e.env[:images].push img
                   {_: :a, class: :thumb, href: href,
                    c: {_: :img, src: if !img.host || e.host==img.host
                         img.path + '?thumb'
                       else
                         img.uri
                        end}}})]
             when Type
               l[Type].justArray.uniq.select{|t|t.respond_to? :uri}.map{|t|
                 {_: :a, href: href, c: Icons[t.uri] ? '' : (t.R.fragment||t.R.basename), class: Icons[t.uri]}}
             when Size
               l[Size].do{|sz|
                 sum = 0
                 sz.justArray.map{|v|
                   sum += v.to_i}
                 sum}
             when Creator
               [l[k].justArray.map{|v|
                 if v.respond_to? :uri
                   indexContext[k,v]
                 else
                   CGI.escapeHTML v.to_s
                 end}.intersperse(' '),
                (l[SIOC+'user_agent'].do{|ua|
                   ['<br>', {_: :span, class: :notes, c: ua.join}]} unless head)]
             when SIOC+'addressed_to'
               l[k].justArray.map{|v|
                 if v.respond_to? :uri
                   indexContext[k,v]
                 else
                   CGI.escapeHTML v.to_s
                 end}.intersperse(' ')
             when Date
               {_: :a, class: :date, href: (datePath||'') + '#r' + href.sha2, c: date}
             when DC+'cache'
               l[k].justArray.map{|c|[{_: :a, href: c.path, c: '&#9939;'}, ' ']}
             else
               l[k].justArray.map{|v|v.respond_to?(:uri) ? v.R : CGI.escapeHTML(v.to_s)}.intersperse(' ')
             end}}.intersperse("\n")}.update(focus ? {} : {id: rowID})
    end
  }

  # tree-graph grep w/ highlighted HTML output
  Grep = -> graph, q {
    # tokenize query
    wordIndex = {}
    words = R.tokens q
    words.each_with_index{|word,i|
      wordIndex[word] = i}
    # pattern expression
    pattern = /(#{words.join '|'})/i
    # match resources
    graph.map{|u,r|graph.delete u unless r.to_s.match pattern}
    # highlight matches
    graph.values.map{|r|
      r[Content].justArray.map(&:lines).flatten.grep(pattern).do{|lines|
        r[Content] = lines[0..5].map{|line|
          line.gsub(/<[^>]+>/,'')[0..512].gsub(pattern){|g| # capture matches
            H({_: :span, class: "w#{wordIndex[g.downcase]}", c: g}) # render HTML
          }} if lines.size > 0
      }}
    # add highlight CSS to graph
    graph['#grep.CSS'] = {Content => H({_: :style,
                                        c: wordIndex.values.map{|i|
                                          ".w#{i} {background-color: #{'#%06x' % (rand 16777216)}; color: white}\n"}})}
    graph}

end
2017-09-16T14:14:11+00:00 15946
ww.rb
# coding: utf-8
%w{cgi csv date digest/sha2 dimensions fileutils json linkeddata mail nokogiri open-uri pathname rack rdf redcarpet shellwords}.map{|r|require r}
class Array
  def intersperse i; inject([]){|a,b|a << b << i}[0..-2] end
  def justArray; self end
end
class FalseClass
  def do; false end
end
class Hash
  def R; R.new self["uri"] end
  def uri; self["uri"] end
  def types; self[R::Type].justArray.select{|t|t.respond_to? :uri}.map &:uri end
end
class NilClass
  def do; nil end
  def justArray; [] end
end
class Object
  def id; self end
  def do; yield self end
  def justArray; [self] end
  def to_time
    [Time, DateTime].member?(self.class) ? self : Time.parse(self)
  end
end
class RDF::URI
  def R; R.new to_s end
end
class RDF::Node
  def R; R.new to_s end
end
class Pathname
  def R; R.fromPOSIX to_s.utf8 end
end
class R < RDF::URI
  # URI constants
  W3 = 'http://www.w3.org/'
  OA = 'https://www.w3.org/ns/oa#'
  Purl = 'http://purl.org/'
  DC   = Purl + 'dc/terms/'
  DCe  = Purl + 'dc/elements/1.1/'
  SIOC = 'http://rdfs.org/sioc/ns#'
  Schema = 'http://schema.org/'
  Podcast = 'http://www.itunes.com/dtds/podcast-1.0.dtd#'
  Harvard  = 'http://harvard.edu/'
  Sound    = Purl + 'ontology/mo/Sound'
  Image    = DC + 'Image'
  RSS      = Purl + 'rss/1.0/'
  Date     = DC   + 'date'
  Title    = DC   + 'title'
  Post     = SIOC + 'Post'
  To       = SIOC + 'addressed_to'
  From     = SIOC + 'has_creator'
  Creator  = SIOC + 'has_creator'
  Content  = SIOC + 'content'
  Stat     = W3   + 'ns/posix/stat#'
  Atom     = W3   + '2005/Atom#'
  Type     = W3 + '1999/02/22-rdf-syntax-ns#type'
  Label    = W3 + '2000/01/rdf-schema#label'
  Size     = Stat + 'size'
  Mtime    = Stat + 'mtime'
  Container = W3  + 'ns/ldp#Container'

  def R; self end
  def R.[] uri; R.new uri end
  def R.fromPOSIX path; path.sub(/^\./,'').gsub(' ','%20').gsub('#','%23').R end
  def R.tokens str; str.scan(/[\w]+/).map(&:downcase).uniq end
  def + u; R[uri + u.to_s].setEnv @r end
  def <=> c; to_s <=> c.to_s end
  def ==  u; to_s == u.to_s end
  def basename; File.basename path end
  def children; node.children.delete_if{|f|f.basename.to_s.index('.')==0}.map{|c|c.R.setEnv @r} end
  def dir; ((host ? ('//'+host) : '') + dirname).R end
  def dirname; File.dirname path end
  def env; @r end
  def exist?; node.exist? end
  def ext; (File.extname uri)[1..-1] || '' end
  def file?; node.file? end
  def find p; `find #{sh} -ipath #{('*'+p+'*').sh} | head -n 1024`.lines.map{|l|R.fromPOSIX l.chomp} end
  def glob; (Pathname.glob pathPOSIX).map{|p|p.R.setEnv @r}.do{|g|g.empty? ? nil : g} end
  def label; fragment || (path && basename != '/' && (URI.unescape basename)) || host || '' end
  def ln a,b; FileUtils.ln a.pathPOSIX, b.pathPOSIX end
  def ln_s a,b; FileUtils.ln_s a.pathPOSIX, b.pathPOSIX end
  def match p; to_s.match p end
  def mkdir; FileUtils.mkdir_p pathPOSIX unless exist?; self end
  def mtime; node.stat.mtime end
  def node; Pathname.new pathPOSIX end
  def pathPOSIX; URI.unescape(path[0]=='/' ? '.'+path : path) end
  def readFile; File.open(pathPOSIX).read end
  def setEnv r; @r = r; self end
  def shellPath; pathPOSIX.utf8.sh end
  def size; node.size rescue 0 end
  def stripDoc; R[uri.sub /\.(e|html|json|log|md|msg|ttl|txt)$/,''].setEnv(@r) end
  def thumb; dir + '/.' + basename + '.jpg' end
  def to_json *a; {'uri' => uri}.to_json *a end
  def writeFile o; dir.mkdir; File.open(pathPOSIX,'w'){|f|f << o}; self end

  alias_method :e, :exist?
  alias_method :m, :mtime
  alias_method :sh, :shellPath
  alias_method :uri, :to_s

  %w{MIME HTML HTTP}.map{|r|require_relative r}

end
2017-09-06T10:44:42+00:00 3630
install
#!/usr/bin/env ruby
require 'fileutils'
require 'pathname'
loc = RbConfig::CONFIG["sitelibdir"] + '/'
FileUtils.mkdir_p loc unless Pathname(loc).exist?
FileUtils.ln_s (File.expand_path File.dirname __FILE__)+'/ww.rb', loc
2017-08-22T22:21:39+00:00
R 2017-08-22T21:28:20+00:00 132
Gemfile.lock 2017-07-18T20:54:07+00:00 4001
Gemfile
source "https://rubygems.org/"
gem 'dimensions'
gem 'foreman'
gem 'linkeddata'
gem 'mail'
gem 'nokogiri'
gem 'nokogiri-diff'
gem 'pry'
gem 'pry-doc'
gem 'rack'
gem 'redcarpet'
gem 'thin'
gem 'unicorn'
2017-07-01T08:17:47+00:00 201
config.ru 2017-06-17T23:31:11+00:00 38