SecurityUpdate

2018-08-20 23:41:39 +09:00 · 2018-08-20 23:41:39 +09:00 · 09b4c05ce1
commit 09b4c05ce1
parent 4a2269092a
4 changed files with 360 additions and 1 deletions
--- a/2
+++ b/2
@ -96,6 +96,8 @@ gem 'json-ld', git: 'https://github.com/ruby-rdf/json-ld.git', ref: '345b7a57333
 gem 'json-ld-preloaded', '~> 3.0'
 gem 'rdf-normalize', '~> 0.3'

+gem 'redcarpet', "~> 3.4.0" 
+
 group :development, :test do
  gem 'fabrication', '~> 2.20'
  gem 'fuubar', '~> 2.4'
--- a/Gemfile.lock
+++ b/Gemfile.lock
@ -496,6 +496,7 @@ GEM
      link_header (~> 0.0, >= 0.0.8)
    rdf-normalize (0.3.3)
      rdf (>= 2.2, < 4.0)
+    redcarpet (3.4.0)
    redis (4.1.2)
    redis-actionpack (5.0.2)
      actionpack (>= 4.0, < 6)
@ -755,6 +756,7 @@ DEPENDENCIES
  rails-i18n (~> 5.1)
  rails-settings-cached (~> 0.6)
  rdf-normalize (~> 0.3)
+  redcarpet (~> 3.4.0)
  redis (~> 4.1)
  redis-namespace (~> 1.5)
  redis-rails (~> 5.0)
--- a/app/lib/formatter.rb
+++ b/app/lib/formatter.rb
@ -1,6 +1,7 @@
 # frozen_string_literal: true

 require 'singleton'
+require_relative './formatter_markdown'
 require_relative './sanitize_config'

 class Formatter
@ -35,12 +36,21 @@ class Formatter
    linkable_accounts << status.account

    html = raw_content
+
+    mdFormatter = Formatter_Markdown.new(html)
+    html = mdFormatter.formatted
+
    html = "RT @#{prepend_reblog} #{html}" if prepend_reblog
    html = encode_and_link_urls(html, linkable_accounts)
    html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
    html = simple_format(html, {}, sanitize: false)
    html = html.delete("\n")

+    mdLinkDecoder = MDLinkDecoder.new(html)
+    html = mdLinkDecoder.decode
+
+    html.gsub!(/(&amp;)/){"&"}
+
    html.html_safe # rubocop:disable Rails/OutputSafety
  end

@ -111,13 +121,18 @@ class Formatter
  def encode_and_link_urls(html, accounts = nil, options = {})
    entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)

+    mdExtractor = MDExtractor.new(html)
+    entities.concat(mdExtractor.extractEntities)
+
    if accounts.is_a?(Hash)
      options  = accounts
      accounts = nil
    end

    rewrite(html.dup, entities) do |entity|
-      if entity[:url]
+      if entity[:markdown]
+        html[entity[:indices][0]...entity[:indices][1]]
+      elsif entity[:url]
        link_to_url(entity, options)
      elsif entity[:hashtag]
        link_to_hashtag(entity)
--- a/app/lib/formatter_markdown.rb
+++ b/app/lib/formatter_markdown.rb
@ -0,0 +1,340 @@
+require 'uri'
+require 'redcarpet'
+require 'redcarpet/render_strip'
+
+class Formatter_Markdown
+    def initialize(html)
+        @html = html.dup
+    end
+
+    def formatted
+        mdRenderer = CustomMDRenderer.new(
+            strikethrough: true,
+            hard_wrap: true,
+            autolink: false,
+            superscript:false,
+            fenced_link: true,
+            fenced_image: true,
+            no_intra_emphasis: true,
+            no_links: true,
+            no_styles: true,
+            no_images: true,
+            filter_html: true,
+            escape_html: true,
+            safe_links_only: true,
+            with_toc_data: true,
+            xhtml: false,
+            prettify: true,
+            link_attributes: true
+        )
+
+        md = Redcarpet::Markdown.new(
+            mdRenderer,
+            strikethrough: true,
+            hard_wrap: true,
+            superscript:false,
+            autolink: false,
+            space_after_headers: true,
+            no_intra_emphasis: true,
+            no_links: true,
+            no_styles: true,
+            no_images: true,
+            filter_html: true,
+            escape_html: true,
+            safe_links_only: true,
+            with_toc_data: true,
+            xhtml: false,
+            prettify: true,
+            link_attributes: true
+        )
+        s = @html
+        s.gsub!(/\n[\n]+/) {"\n　\n"}# 改行周りの問題を修正
+        s.gsub!(/`[ ]+`/) {"｀ ｀"}# code内が半角スペースのみだとHTMLが壊れるのでそれの回避
+
+        renderedMD = md.render(s)
+
+        result = renderedMD
+        result.gsub!(/(<\w+)([^>]*>)/) { "#{$1} data-md='true' #{$2}" }# ToDo data-md="true" を認識して他鯖の人にmarkdownの使用を伝える機能の実装
+        result.gsub!(/(https?:\/\/[^<>"\[\] 　]+)/){"#{$1} "}#URLの後ろにスペースをねじ込む奴 mastodonのURL認識がゆるいのをmarkdownで対処
+
+        result
+
+    end
+
+    class CustomMDRenderer < Redcarpet::Render::HTML
+
+        #基本的な実装の流れ
+        #URLの削除(mastodonの機能上URLとして認識されると十中八九HTMLが壊れるので)
+        #markdownコンテンツ内でのmarkdownコンテンツの禁止(意図しないHTMLタグの生成によってHTMLの不正出力を防ぐ目的)
+        #最後にHTMLに出力される際にHTML的にヤバイ子たちのエスケープ
+
+        def paragraph(text)
+            %(#{text.strip})
+        end
+
+        def linebreak()
+            %(<br>)
+        end
+
+        def block_quote(quote)
+            urlRemoved = "#{remove_url(quote)}"
+            escapedContents = "#{blockquote_markdown_escape(urlRemoved)}"
+            %(<blockquote>#{escapedContents.strip}</blockquote>)
+        end
+
+        def header(text, header_level)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<h#{header_level}>#{encode(mdContentsRemoved)}</h#{header_level}>\n)
+        end
+
+        def codespan(code)
+            urlRemoved = "#{remove_url(code)}"
+            escapedCode = "#{escape_bbcode(urlRemoved)}"
+            %(<code>#{encode(escapedCode)}</code>)
+        end
+
+        def list(contents, list_type)
+            if list_type == :unordered
+                %(<ul>#{contents.strip}</ul>)
+            elsif list_type == :ordered
+                %(<ol>#{contents.strip}</ol>)
+            else
+                %(<#{list_type}>#{contents.strip}</#{list_type}>)
+            end
+        end
+
+        def list_item(text, list_type)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<li>#{encode(mdContentsRemoved)}</li>)
+        end
+
+        def emphasis(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<sup>#{encode(mdContentsRemoved)}</sup>)
+        end
+
+        def double_emphasis(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<sub>#{encode(mdContentsRemoved)}</sub>)
+        end
+
+        def triple_emphasis(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<small>#{encode(mdContentsRemoved)}</small>)
+        end
+
+        def strikethrough(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<s>#{encode(mdContentsRemoved)}</s>)
+        end
+
+        def superscript(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<sup>#{encode(mdContentsRemoved)}</sup>)
+        end
+
+        def underline(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<u>#{encode(mdContentsRemoved)}</u>)
+        end
+
+        def highlight(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<mark>#{encode(mdContentsRemoved)}</mark>)
+        end
+
+        #オートリンクはmastodonとの相性が悪いので基本的には使わない
+
+        def autolink(link, link_type)
+            %(<a herf="#{link}">リンク</a>)
+        end
+
+        #https以外の物がURLとして記入された時にTextをHTML的に考えて安全に表示するように変更
+
+        def image(link, title, alt_text)
+
+            if alt_text =~ /[<>"\[\] 　]+/
+                alt_text = "設定なし"
+            end
+
+            imgcheck = "#{link}"
+            if imgcheck !~ /\Ahttps:\/\/[^<>"\[\] 　]+\z/
+                %(#{encode(alt_text)})
+            else
+                %(<img src="#{URI.encode_www_form_component(link)}">)
+            end
+        end
+
+        def link(link, title, content)
+
+            if content =~ /([<>"\[\] 　]+|https?:\/\/|#|@)/
+                content = "リンク"
+            elsif content !~ /.+/
+                content = "リンク"
+            end
+
+            linkcheck = "#{link}"
+            if linkcheck !~ /\Ahttps:\/\/[^<>"\[\] 　]+\z/
+                %(#{encode(content)})
+            else
+                %(<a href="#{URI.encode_www_form_component(link)}">#{encode(content)}</a>)
+            end
+        end
+
+        #ここから下はいろいろエスケープするための奴
+
+        #HTML的に考えてよろしくない子たちをエスケープする奴
+        def encode(html)
+            HTMLEntities.new.encode(html)
+        end
+
+        #markdownコンテンツないでURLが生成されるのを防ぐためのエスケープする奴
+        def remove_url(string)
+            url = string.gsub(/https?:\/\//){ "URL:" }
+            reply = url.gsub(/@/){ "＠" }
+            hashTag = reply.gsub(/#/){ "＃" }
+        end
+
+        #前々から要望があったcode内でBBCodeを無効化するための奴
+        def escape_bbcode(string)
+            string.gsub(/\[/){ "［" }
+        end
+
+        #markdownの中でmarkdownを展開させないためのエスケープする奴
+
+        #blockquote以外は下のが使える
+        def markdown_escape(string)
+            string.gsub(/<[^>]+>/) { "" }
+        end
+
+        #blockquoteコンテンツ内でblockquoteタグだけを許可するためのエスケープ
+        def blockquote_markdown_escape(string)
+            string.gsub(/<([\/]?a[^>]*|[\/]?img[^>]*|[\/]?code[^>]*|[\/]?h[1-6][^>]*|[\/]?sup[^>]*|[\/]?sub[^>]*|[\/]?small[^>]*|[\/]?ul[^>]*|[\/]?ol[^>]*|[\/]?li[^>]*|[\/]?hr[^>]*|[\/]?s[^>]*|[\/]?u[^>]*|[\/]?mark[^>]*)>/) { "" }
+        end
+
+        #テストで書きなぐった奴
+        def html_escape(string)
+            string.gsub(/['&\"<>\/]/, {
+              '&' => '&amp;',
+              '<' => '&lt;',
+              '>' => '&gt;',
+              '"' => '&quot;',
+              "'" => '&#x27;',
+              "/" => '&#x2F;',
+            })
+        end
+
+    end
+
+end
+
+#URLとかいう人類には早すぎたやばい子たちを大人しくするために必要な機構
+
+class MDLinkDecoder
+    def initialize(html)
+        @html = html.dup
+    end
+
+    def decode
+        imageDecoded = @html.gsub(/<img data-md='true'\s+src="([^"]+)"([^>]*)>/) { "<a href=\"" + URI.decode_www_form_component($1) + "\"" + $2 + "><img data-md='true' src=\"" + URI.decode_www_form_component($1) + "\"" + $2 + "></a>" }
+
+        imageDecoded.gsub(/<a data-md='true'\s+href="([^"]+)"([^>]*)>/) { "<a data-md='true' href=\"" + URI.decode_www_form_component($1) + "\"" + $2 + ">" }
+    end
+end
+
+#エスケープを回避するHTMLタグの設定とかその他
+
+class MDExtractor
+    def initialize(html)
+        @html = html.dup
+    end
+
+    def extractEntities
+        [
+            extractByHTMLTagName("h1"),
+            extractByHTMLTagName("h2"),
+            extractByHTMLTagName("h3"),
+            extractByHTMLTagName("h4"),
+            extractByHTMLTagName("h5"),
+            extractByHTMLTagName("h6"),
+            extractByHTMLTagName("em"),
+            extractByHTMLTagName("sup"),
+            extractByHTMLTagName("sub"),
+            extractByHTMLTagName("small"),
+            extractByHTMLTagName("u"),
+            extractByHTMLTagName("strong"),
+            extractByHTMLTagName("ul", false, false, "li"),
+            extractByHTMLTagName("ol", false, false, "li"),
+            extractByHTMLTagName("code"),
+            extractByHTMLTagName("blockquote", false),
+            extractByHTMLTagName("hr", false, true),
+            extractByHTMLTagName("br", false, true),
+            extractByHTMLTagName("a"),
+            extractByHTMLTagName("img", false, true),
+            extractByHTMLTagName("s")
+        ].flatten.compact
+    end
+
+    def extractByHTMLTagName(tagName, isNoNest = true, isSingle = false, itemTagName = nil)
+        entities = []
+
+        @html.to_s.scan(htmlTagPatternByCond(tagName, isNoNest, isSingle, itemTagName)) do
+            match = $~
+
+            beginPos = match.char_begin(0)
+            endPos = match.char_end(0)
+            #puts "MDExtractor extracted with:\n" + @html + "\nbeginPos: " + beginPos.to_s + ", endPos: " + endPos.to_s + ", length: " + @html.length.to_s
+
+            entity = {
+                :markdown => true,
+                :indices => [beginPos, endPos]
+            }
+
+            entities.push(entity)
+        end
+
+        entities
+    end
+
+    def htmlTagPatternByCond(tagName, isNoNest, isSingle, itemTagName)
+        if isSingle
+            htmlTagPatternSingle(tagName)
+        elsif isNoNest
+            htmlTagPatternNoNest(tagName)
+        elsif itemTagName && itemTagName.length > 0
+            htmlTagPatternOuterMostWithItem(tagName, itemTagName)
+        else
+            htmlTagPatternOuterMost(tagName)
+        end
+    end
+
+    def htmlTagPattern(tagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<]|<#{tagName} data-md=[^>]*>|<\\/#{tagName}>)*(?:<\\/#{tagName}>)*")
+    end
+
+    def htmlTagPatternNoNest(tagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>(?:.|\n)*?<\\/#{tagName}>")
+    end
+
+    def htmlTagPatternSingle(tagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>")
+    end
+
+    # https://stackoverflow.com/questions/546433/regular-expression-to-match-outer-brackets
+    def htmlTagPatternOuterMost(tagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<>]|(\\g<0>))*<\/#{tagName}>")
+    end
+
+    def htmlTagPatternOuterMostWithItem(tagName, itemTagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<>]|<#{itemTagName} data-md=[^>]*>|<\\/#{itemTagName}>|(\\g<0>))*<\/#{tagName}>")
+    end
+end