From 09b4c05ce19d5ec824b4b382b9b8eb58baba2da6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9C=A7=E5=B3=B6=E3=81=B2=E3=81=AA=E3=81=9F?=
 <naaba6z7y@gmail.com>
Date: Mon, 20 Aug 2018 23:41:39 +0900
Subject: [PATCH] SecurityUpdate

---
 Gemfile                       |   2 +
 Gemfile.lock                  |   2 +
 app/lib/formatter.rb          |  17 +-
 app/lib/formatter_markdown.rb | 340 ++++++++++++++++++++++++++++++++++
 4 files changed, 360 insertions(+), 1 deletion(-)
 create mode 100644 app/lib/formatter_markdown.rb

diff --git a/Gemfile b/Gemfile
index 263be0ac3..cbf00459c 100644
--- a/Gemfile
+++ b/Gemfile
@@ -96,6 +96,8 @@ gem 'json-ld', git: 'https://github.com/ruby-rdf/json-ld.git', ref: '345b7a57333
 gem 'json-ld-preloaded', '~> 3.0'
 gem 'rdf-normalize', '~> 0.3'
 
+gem 'redcarpet', "~> 3.4.0" 
+
 group :development, :test do
   gem 'fabrication', '~> 2.20'
   gem 'fuubar', '~> 2.4'
diff --git a/Gemfile.lock b/Gemfile.lock
index f185f3fa5..eb5e5445a 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -496,6 +496,7 @@ GEM
       link_header (~> 0.0, >= 0.0.8)
     rdf-normalize (0.3.3)
       rdf (>= 2.2, < 4.0)
+    redcarpet (3.4.0)
     redis (4.1.2)
     redis-actionpack (5.0.2)
       actionpack (>= 4.0, < 6)
@@ -755,6 +756,7 @@ DEPENDENCIES
   rails-i18n (~> 5.1)
   rails-settings-cached (~> 0.6)
   rdf-normalize (~> 0.3)
+  redcarpet (~> 3.4.0)
   redis (~> 4.1)
   redis-namespace (~> 1.5)
   redis-rails (~> 5.0)
diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
index b5f42305f..c1ad9c701 100644
--- a/app/lib/formatter.rb
+++ b/app/lib/formatter.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require 'singleton'
+require_relative './formatter_markdown'
 require_relative './sanitize_config'
 
 class Formatter
@@ -35,12 +36,21 @@ class Formatter
     linkable_accounts << status.account
 
     html = raw_content
+
+    mdFormatter = Formatter_Markdown.new(html)
+    html = mdFormatter.formatted
+
     html = "RT @#{prepend_reblog} #{html}" if prepend_reblog
     html = encode_and_link_urls(html, linkable_accounts)
     html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
     html = simple_format(html, {}, sanitize: false)
     html = html.delete("\n")
 
+    mdLinkDecoder = MDLinkDecoder.new(html)
+    html = mdLinkDecoder.decode
+
+    html.gsub!(/(&amp;)/){"&"}
+
     html.html_safe # rubocop:disable Rails/OutputSafety
   end
 
@@ -111,13 +121,18 @@ class Formatter
   def encode_and_link_urls(html, accounts = nil, options = {})
     entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
 
+    mdExtractor = MDExtractor.new(html)
+    entities.concat(mdExtractor.extractEntities)
+
     if accounts.is_a?(Hash)
       options  = accounts
       accounts = nil
     end
 
     rewrite(html.dup, entities) do |entity|
-      if entity[:url]
+      if entity[:markdown]
+        html[entity[:indices][0]...entity[:indices][1]]
+      elsif entity[:url]
         link_to_url(entity, options)
       elsif entity[:hashtag]
         link_to_hashtag(entity)
diff --git a/app/lib/formatter_markdown.rb b/app/lib/formatter_markdown.rb
new file mode 100644
index 000000000..30a08d558
--- /dev/null
+++ b/app/lib/formatter_markdown.rb
@@ -0,0 +1,340 @@
+require 'uri'
+require 'redcarpet'
+require 'redcarpet/render_strip'
+
+class Formatter_Markdown
+    def initialize(html)
+        @html = html.dup
+    end
+
+    def formatted
+        mdRenderer = CustomMDRenderer.new(
+            strikethrough: true,
+            hard_wrap: true,
+            autolink: false,
+            superscript:false,
+            fenced_link: true,
+            fenced_image: true,
+            no_intra_emphasis: true,
+            no_links: true,
+            no_styles: true,
+            no_images: true,
+            filter_html: true,
+            escape_html: true,
+            safe_links_only: true,
+            with_toc_data: true,
+            xhtml: false,
+            prettify: true,
+            link_attributes: true
+        )
+
+        md = Redcarpet::Markdown.new(
+            mdRenderer,
+            strikethrough: true,
+            hard_wrap: true,
+            superscript:false,
+            autolink: false,
+            space_after_headers: true,
+            no_intra_emphasis: true,
+            no_links: true,
+            no_styles: true,
+            no_images: true,
+            filter_html: true,
+            escape_html: true,
+            safe_links_only: true,
+            with_toc_data: true,
+            xhtml: false,
+            prettify: true,
+            link_attributes: true
+        )
+        s = @html
+        s.gsub!(/\n[\n]+/) {"\n　\n"}# 改行周りの問題を修正
+        s.gsub!(/`[ ]+`/) {"｀ ｀"}# code内が半角スペースのみだとHTMLが壊れるのでそれの回避
+
+        renderedMD = md.render(s)
+
+        result = renderedMD
+        result.gsub!(/(<\w+)([^>]*>)/) { "#{$1} data-md='true' #{$2}" }# ToDo data-md="true" を認識して他鯖の人にmarkdownの使用を伝える機能の実装
+        result.gsub!(/(https?:\/\/[^<>"\[\] 　]+)/){"#{$1} "}#URLの後ろにスペースをねじ込む奴 mastodonのURL認識がゆるいのをmarkdownで対処
+
+        result
+
+    end
+
+    class CustomMDRenderer < Redcarpet::Render::HTML
+
+        #基本的な実装の流れ
+        #URLの削除(mastodonの機能上URLとして認識されると十中八九HTMLが壊れるので)
+        #markdownコンテンツ内でのmarkdownコンテンツの禁止(意図しないHTMLタグの生成によってHTMLの不正出力を防ぐ目的)
+        #最後にHTMLに出力される際にHTML的にヤバイ子たちのエスケープ
+
+        def paragraph(text)
+            %(#{text.strip})
+        end
+
+        def linebreak()
+            %(<br>)
+        end
+
+        def block_quote(quote)
+            urlRemoved = "#{remove_url(quote)}"
+            escapedContents = "#{blockquote_markdown_escape(urlRemoved)}"
+            %(<blockquote>#{escapedContents.strip}</blockquote>)
+        end
+
+        def header(text, header_level)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<h#{header_level}>#{encode(mdContentsRemoved)}</h#{header_level}>\n)
+        end
+
+        def codespan(code)
+            urlRemoved = "#{remove_url(code)}"
+            escapedCode = "#{escape_bbcode(urlRemoved)}"
+            %(<code>#{encode(escapedCode)}</code>)
+        end
+
+        def list(contents, list_type)
+            if list_type == :unordered
+                %(<ul>#{contents.strip}</ul>)
+            elsif list_type == :ordered
+                %(<ol>#{contents.strip}</ol>)
+            else
+                %(<#{list_type}>#{contents.strip}</#{list_type}>)
+            end
+        end
+
+        def list_item(text, list_type)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<li>#{encode(mdContentsRemoved)}</li>)
+        end
+
+        def emphasis(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<sup>#{encode(mdContentsRemoved)}</sup>)
+        end
+
+        def double_emphasis(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<sub>#{encode(mdContentsRemoved)}</sub>)
+        end
+
+        def triple_emphasis(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<small>#{encode(mdContentsRemoved)}</small>)
+        end
+
+        def strikethrough(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<s>#{encode(mdContentsRemoved)}</s>)
+        end
+
+        def superscript(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<sup>#{encode(mdContentsRemoved)}</sup>)
+        end
+
+        def underline(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<u>#{encode(mdContentsRemoved)}</u>)
+        end
+
+        def highlight(text)
+            urlRemoved = "#{remove_url(text)}"
+            mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
+            %(<mark>#{encode(mdContentsRemoved)}</mark>)
+        end
+
+        #オートリンクはmastodonとの相性が悪いので基本的には使わない
+
+        def autolink(link, link_type)
+            %(<a herf="#{link}">リンク</a>)
+        end
+
+        #https以外の物がURLとして記入された時にTextをHTML的に考えて安全に表示するように変更
+
+        def image(link, title, alt_text)
+
+            if alt_text =~ /[<>"\[\] 　]+/
+                alt_text = "設定なし"
+            end
+
+            imgcheck = "#{link}"
+            if imgcheck !~ /\Ahttps:\/\/[^<>"\[\] 　]+\z/
+                %(#{encode(alt_text)})
+            else
+                %(<img src="#{URI.encode_www_form_component(link)}">)
+            end
+        end
+
+        def link(link, title, content)
+
+            if content =~ /([<>"\[\] 　]+|https?:\/\/|#|@)/
+                content = "リンク"
+            elsif content !~ /.+/
+                content = "リンク"
+            end
+
+            linkcheck = "#{link}"
+            if linkcheck !~ /\Ahttps:\/\/[^<>"\[\] 　]+\z/
+                %(#{encode(content)})
+            else
+                %(<a href="#{URI.encode_www_form_component(link)}">#{encode(content)}</a>)
+            end
+        end
+
+        #ここから下はいろいろエスケープするための奴
+
+        #HTML的に考えてよろしくない子たちをエスケープする奴
+        def encode(html)
+            HTMLEntities.new.encode(html)
+        end
+
+        #markdownコンテンツないでURLが生成されるのを防ぐためのエスケープする奴
+        def remove_url(string)
+            url = string.gsub(/https?:\/\//){ "URL:" }
+            reply = url.gsub(/@/){ "＠" }
+            hashTag = reply.gsub(/#/){ "＃" }
+        end
+
+        #前々から要望があったcode内でBBCodeを無効化するための奴
+        def escape_bbcode(string)
+            string.gsub(/\[/){ "［" }
+        end
+
+        #markdownの中でmarkdownを展開させないためのエスケープする奴
+
+        #blockquote以外は下のが使える
+        def markdown_escape(string)
+            string.gsub(/<[^>]+>/) { "" }
+        end
+
+        #blockquoteコンテンツ内でblockquoteタグだけを許可するためのエスケープ
+        def blockquote_markdown_escape(string)
+            string.gsub(/<([\/]?a[^>]*|[\/]?img[^>]*|[\/]?code[^>]*|[\/]?h[1-6][^>]*|[\/]?sup[^>]*|[\/]?sub[^>]*|[\/]?small[^>]*|[\/]?ul[^>]*|[\/]?ol[^>]*|[\/]?li[^>]*|[\/]?hr[^>]*|[\/]?s[^>]*|[\/]?u[^>]*|[\/]?mark[^>]*)>/) { "" }
+        end
+
+        #テストで書きなぐった奴
+        def html_escape(string)
+            string.gsub(/['&\"<>\/]/, {
+              '&' => '&amp;',
+              '<' => '&lt;',
+              '>' => '&gt;',
+              '"' => '&quot;',
+              "'" => '&#x27;',
+              "/" => '&#x2F;',
+            })
+        end
+
+    end
+
+end
+
+#URLとかいう人類には早すぎたやばい子たちを大人しくするために必要な機構
+
+class MDLinkDecoder
+    def initialize(html)
+        @html = html.dup
+    end
+
+    def decode
+        imageDecoded = @html.gsub(/<img data-md='true'\s+src="([^"]+)"([^>]*)>/) { "<a href=\"" + URI.decode_www_form_component($1) + "\"" + $2 + "><img data-md='true' src=\"" + URI.decode_www_form_component($1) + "\"" + $2 + "></a>" }
+
+        imageDecoded.gsub(/<a data-md='true'\s+href="([^"]+)"([^>]*)>/) { "<a data-md='true' href=\"" + URI.decode_www_form_component($1) + "\"" + $2 + ">" }
+    end
+end
+
+#エスケープを回避するHTMLタグの設定とかその他
+
+class MDExtractor
+    def initialize(html)
+        @html = html.dup
+    end
+
+    def extractEntities
+        [
+            extractByHTMLTagName("h1"),
+            extractByHTMLTagName("h2"),
+            extractByHTMLTagName("h3"),
+            extractByHTMLTagName("h4"),
+            extractByHTMLTagName("h5"),
+            extractByHTMLTagName("h6"),
+            extractByHTMLTagName("em"),
+            extractByHTMLTagName("sup"),
+            extractByHTMLTagName("sub"),
+            extractByHTMLTagName("small"),
+            extractByHTMLTagName("u"),
+            extractByHTMLTagName("strong"),
+            extractByHTMLTagName("ul", false, false, "li"),
+            extractByHTMLTagName("ol", false, false, "li"),
+            extractByHTMLTagName("code"),
+            extractByHTMLTagName("blockquote", false),
+            extractByHTMLTagName("hr", false, true),
+            extractByHTMLTagName("br", false, true),
+            extractByHTMLTagName("a"),
+            extractByHTMLTagName("img", false, true),
+            extractByHTMLTagName("s")
+        ].flatten.compact
+    end
+
+    def extractByHTMLTagName(tagName, isNoNest = true, isSingle = false, itemTagName = nil)
+        entities = []
+
+        @html.to_s.scan(htmlTagPatternByCond(tagName, isNoNest, isSingle, itemTagName)) do
+            match = $~
+
+            beginPos = match.char_begin(0)
+            endPos = match.char_end(0)
+            #puts "MDExtractor extracted with:\n" + @html + "\nbeginPos: " + beginPos.to_s + ", endPos: " + endPos.to_s + ", length: " + @html.length.to_s
+
+            entity = {
+                :markdown => true,
+                :indices => [beginPos, endPos]
+            }
+
+            entities.push(entity)
+        end
+
+        entities
+    end
+
+    def htmlTagPatternByCond(tagName, isNoNest, isSingle, itemTagName)
+        if isSingle
+            htmlTagPatternSingle(tagName)
+        elsif isNoNest
+            htmlTagPatternNoNest(tagName)
+        elsif itemTagName && itemTagName.length > 0
+            htmlTagPatternOuterMostWithItem(tagName, itemTagName)
+        else
+            htmlTagPatternOuterMost(tagName)
+        end
+    end
+
+    def htmlTagPattern(tagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<]|<#{tagName} data-md=[^>]*>|<\\/#{tagName}>)*(?:<\\/#{tagName}>)*")
+    end
+
+    def htmlTagPatternNoNest(tagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>(?:.|\n)*?<\\/#{tagName}>")
+    end
+
+    def htmlTagPatternSingle(tagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>")
+    end
+
+    # https://stackoverflow.com/questions/546433/regular-expression-to-match-outer-brackets
+    def htmlTagPatternOuterMost(tagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<>]|(\\g<0>))*<\/#{tagName}>")
+    end
+
+    def htmlTagPatternOuterMostWithItem(tagName, itemTagName)
+        Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<>]|<#{itemTagName} data-md=[^>]*>|<\\/#{itemTagName}>|(\\g<0>))*<\/#{tagName}>")
+    end
+end
\ No newline at end of file