SecurityUpdate

This commit is contained in:
霧島ひなた 2018-08-20 23:41:39 +09:00 committed by yoheizuho
parent 4a2269092a
commit 09b4c05ce1
4 changed files with 360 additions and 1 deletions

View File

@ -96,6 +96,8 @@ gem 'json-ld', git: 'https://github.com/ruby-rdf/json-ld.git', ref: '345b7a57333
gem 'json-ld-preloaded', '~> 3.0'
gem 'rdf-normalize', '~> 0.3'
gem 'redcarpet', "~> 3.4.0"
group :development, :test do
gem 'fabrication', '~> 2.20'
gem 'fuubar', '~> 2.4'

View File

@ -496,6 +496,7 @@ GEM
link_header (~> 0.0, >= 0.0.8)
rdf-normalize (0.3.3)
rdf (>= 2.2, < 4.0)
redcarpet (3.4.0)
redis (4.1.2)
redis-actionpack (5.0.2)
actionpack (>= 4.0, < 6)
@ -755,6 +756,7 @@ DEPENDENCIES
rails-i18n (~> 5.1)
rails-settings-cached (~> 0.6)
rdf-normalize (~> 0.3)
redcarpet (~> 3.4.0)
redis (~> 4.1)
redis-namespace (~> 1.5)
redis-rails (~> 5.0)

View File

@ -1,6 +1,7 @@
# frozen_string_literal: true
require 'singleton'
require_relative './formatter_markdown'
require_relative './sanitize_config'
class Formatter
@ -35,12 +36,21 @@ class Formatter
linkable_accounts << status.account
html = raw_content
mdFormatter = Formatter_Markdown.new(html)
html = mdFormatter.formatted
html = "RT @#{prepend_reblog} #{html}" if prepend_reblog
html = encode_and_link_urls(html, linkable_accounts)
html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
html = simple_format(html, {}, sanitize: false)
html = html.delete("\n")
mdLinkDecoder = MDLinkDecoder.new(html)
html = mdLinkDecoder.decode
html.gsub!(/(&amp;)/){"&"}
html.html_safe # rubocop:disable Rails/OutputSafety
end
@ -111,13 +121,18 @@ class Formatter
def encode_and_link_urls(html, accounts = nil, options = {})
entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
mdExtractor = MDExtractor.new(html)
entities.concat(mdExtractor.extractEntities)
if accounts.is_a?(Hash)
options = accounts
accounts = nil
end
rewrite(html.dup, entities) do |entity|
if entity[:url]
if entity[:markdown]
html[entity[:indices][0]...entity[:indices][1]]
elsif entity[:url]
link_to_url(entity, options)
elsif entity[:hashtag]
link_to_hashtag(entity)

View File

@ -0,0 +1,340 @@
require 'uri'
require 'redcarpet'
require 'redcarpet/render_strip'
class Formatter_Markdown
def initialize(html)
@html = html.dup
end
def formatted
mdRenderer = CustomMDRenderer.new(
strikethrough: true,
hard_wrap: true,
autolink: false,
superscript:false,
fenced_link: true,
fenced_image: true,
no_intra_emphasis: true,
no_links: true,
no_styles: true,
no_images: true,
filter_html: true,
escape_html: true,
safe_links_only: true,
with_toc_data: true,
xhtml: false,
prettify: true,
link_attributes: true
)
md = Redcarpet::Markdown.new(
mdRenderer,
strikethrough: true,
hard_wrap: true,
superscript:false,
autolink: false,
space_after_headers: true,
no_intra_emphasis: true,
no_links: true,
no_styles: true,
no_images: true,
filter_html: true,
escape_html: true,
safe_links_only: true,
with_toc_data: true,
xhtml: false,
prettify: true,
link_attributes: true
)
s = @html
s.gsub!(/\n[\n]+/) {"\n \n"}# 改行周りの問題を修正
s.gsub!(/`[ ]+`/) {" "}# code内が半角スペースのみだとHTMLが壊れるのでそれの回避
renderedMD = md.render(s)
result = renderedMD
result.gsub!(/(<\w+)([^>]*>)/) { "#{$1} data-md='true' #{$2}" }# ToDo data-md="true" を認識して他鯖の人にmarkdownの使用を伝える機能の実装
result.gsub!(/(https?:\/\/[^<>"\[\]  ]+)/){"#{$1} "}#URLの後ろにスペースをねじ込む奴 mastodonのURL認識がゆるいのをmarkdownで対処
result
end
class CustomMDRenderer < Redcarpet::Render::HTML
#基本的な実装の流れ
#URLの削除(mastodonの機能上URLとして認識されると十中八九HTMLが壊れるので)
#markdownコンテンツ内でのmarkdownコンテンツの禁止(意図しないHTMLタグの生成によってHTMLの不正出力を防ぐ目的)
#最後にHTMLに出力される際にHTML的にヤバイ子たちのエスケープ
def paragraph(text)
%(#{text.strip})
end
def linebreak()
%(<br>)
end
def block_quote(quote)
urlRemoved = "#{remove_url(quote)}"
escapedContents = "#{blockquote_markdown_escape(urlRemoved)}"
%(<blockquote>#{escapedContents.strip}</blockquote>)
end
def header(text, header_level)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<h#{header_level}>#{encode(mdContentsRemoved)}</h#{header_level}>\n)
end
def codespan(code)
urlRemoved = "#{remove_url(code)}"
escapedCode = "#{escape_bbcode(urlRemoved)}"
%(<code>#{encode(escapedCode)}</code>)
end
def list(contents, list_type)
if list_type == :unordered
%(<ul>#{contents.strip}</ul>)
elsif list_type == :ordered
%(<ol>#{contents.strip}</ol>)
else
%(<#{list_type}>#{contents.strip}</#{list_type}>)
end
end
def list_item(text, list_type)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<li>#{encode(mdContentsRemoved)}</li>)
end
def emphasis(text)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<sup>#{encode(mdContentsRemoved)}</sup>)
end
def double_emphasis(text)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<sub>#{encode(mdContentsRemoved)}</sub>)
end
def triple_emphasis(text)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<small>#{encode(mdContentsRemoved)}</small>)
end
def strikethrough(text)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<s>#{encode(mdContentsRemoved)}</s>)
end
def superscript(text)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<sup>#{encode(mdContentsRemoved)}</sup>)
end
def underline(text)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<u>#{encode(mdContentsRemoved)}</u>)
end
def highlight(text)
urlRemoved = "#{remove_url(text)}"
mdContentsRemoved = "#{markdown_escape(urlRemoved)}"
%(<mark>#{encode(mdContentsRemoved)}</mark>)
end
#オートリンクはmastodonとの相性が悪いので基本的には使わない
def autolink(link, link_type)
%(<a herf="#{link}">リンク</a>)
end
#https以外の物がURLとして記入された時にTextをHTML的に考えて安全に表示するように変更
def image(link, title, alt_text)
if alt_text =~ /[<>"\[\]  ]+/
alt_text = "設定なし"
end
imgcheck = "#{link}"
if imgcheck !~ /\Ahttps:\/\/[^<>"\[\]  ]+\z/
%(#{encode(alt_text)})
else
%(<img src="#{URI.encode_www_form_component(link)}">)
end
end
def link(link, title, content)
if content =~ /([<>"\[\]  ]+|https?:\/\/|#|@)/
content = "リンク"
elsif content !~ /.+/
content = "リンク"
end
linkcheck = "#{link}"
if linkcheck !~ /\Ahttps:\/\/[^<>"\[\]  ]+\z/
%(#{encode(content)})
else
%(<a href="#{URI.encode_www_form_component(link)}">#{encode(content)}</a>)
end
end
#ここから下はいろいろエスケープするための奴
#HTML的に考えてよろしくない子たちをエスケープする奴
def encode(html)
HTMLEntities.new.encode(html)
end
#markdownコンテンツないでURLが生成されるのを防ぐためのエスケープする奴
def remove_url(string)
url = string.gsub(/https?:\/\//){ "URL:" }
reply = url.gsub(/@/){ "" }
hashTag = reply.gsub(/#/){ "" }
end
#前々から要望があったcode内でBBCodeを無効化するための奴
def escape_bbcode(string)
string.gsub(/\[/){ "" }
end
#markdownの中でmarkdownを展開させないためのエスケープする奴
#blockquote以外は下のが使える
def markdown_escape(string)
string.gsub(/<[^>]+>/) { "" }
end
#blockquoteコンテンツ内でblockquoteタグだけを許可するためのエスケープ
def blockquote_markdown_escape(string)
string.gsub(/<([\/]?a[^>]*|[\/]?img[^>]*|[\/]?code[^>]*|[\/]?h[1-6][^>]*|[\/]?sup[^>]*|[\/]?sub[^>]*|[\/]?small[^>]*|[\/]?ul[^>]*|[\/]?ol[^>]*|[\/]?li[^>]*|[\/]?hr[^>]*|[\/]?s[^>]*|[\/]?u[^>]*|[\/]?mark[^>]*)>/) { "" }
end
#テストで書きなぐった奴
def html_escape(string)
string.gsub(/['&\"<>\/]/, {
'&' => '&amp;',
'<' => '&lt;',
'>' => '&gt;',
'"' => '&quot;',
"'" => '&#x27;',
"/" => '&#x2F;',
})
end
end
end
#URLとかいう人類には早すぎたやばい子たちを大人しくするために必要な機構
class MDLinkDecoder
def initialize(html)
@html = html.dup
end
def decode
imageDecoded = @html.gsub(/<img data-md='true'\s+src="([^"]+)"([^>]*)>/) { "<a href=\"" + URI.decode_www_form_component($1) + "\"" + $2 + "><img data-md='true' src=\"" + URI.decode_www_form_component($1) + "\"" + $2 + "></a>" }
imageDecoded.gsub(/<a data-md='true'\s+href="([^"]+)"([^>]*)>/) { "<a data-md='true' href=\"" + URI.decode_www_form_component($1) + "\"" + $2 + ">" }
end
end
#エスケープを回避するHTMLタグの設定とかその他
class MDExtractor
def initialize(html)
@html = html.dup
end
def extractEntities
[
extractByHTMLTagName("h1"),
extractByHTMLTagName("h2"),
extractByHTMLTagName("h3"),
extractByHTMLTagName("h4"),
extractByHTMLTagName("h5"),
extractByHTMLTagName("h6"),
extractByHTMLTagName("em"),
extractByHTMLTagName("sup"),
extractByHTMLTagName("sub"),
extractByHTMLTagName("small"),
extractByHTMLTagName("u"),
extractByHTMLTagName("strong"),
extractByHTMLTagName("ul", false, false, "li"),
extractByHTMLTagName("ol", false, false, "li"),
extractByHTMLTagName("code"),
extractByHTMLTagName("blockquote", false),
extractByHTMLTagName("hr", false, true),
extractByHTMLTagName("br", false, true),
extractByHTMLTagName("a"),
extractByHTMLTagName("img", false, true),
extractByHTMLTagName("s")
].flatten.compact
end
def extractByHTMLTagName(tagName, isNoNest = true, isSingle = false, itemTagName = nil)
entities = []
@html.to_s.scan(htmlTagPatternByCond(tagName, isNoNest, isSingle, itemTagName)) do
match = $~
beginPos = match.char_begin(0)
endPos = match.char_end(0)
#puts "MDExtractor extracted with:\n" + @html + "\nbeginPos: " + beginPos.to_s + ", endPos: " + endPos.to_s + ", length: " + @html.length.to_s
entity = {
:markdown => true,
:indices => [beginPos, endPos]
}
entities.push(entity)
end
entities
end
def htmlTagPatternByCond(tagName, isNoNest, isSingle, itemTagName)
if isSingle
htmlTagPatternSingle(tagName)
elsif isNoNest
htmlTagPatternNoNest(tagName)
elsif itemTagName && itemTagName.length > 0
htmlTagPatternOuterMostWithItem(tagName, itemTagName)
else
htmlTagPatternOuterMost(tagName)
end
end
def htmlTagPattern(tagName)
Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<]|<#{tagName} data-md=[^>]*>|<\\/#{tagName}>)*(?:<\\/#{tagName}>)*")
end
def htmlTagPatternNoNest(tagName)
Regexp.compile("<#{tagName} data-md=[^>]*>(?:.|\n)*?<\\/#{tagName}>")
end
def htmlTagPatternSingle(tagName)
Regexp.compile("<#{tagName} data-md=[^>]*>")
end
# https://stackoverflow.com/questions/546433/regular-expression-to-match-outer-brackets
def htmlTagPatternOuterMost(tagName)
Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<>]|(\\g<0>))*<\/#{tagName}>")
end
def htmlTagPatternOuterMostWithItem(tagName, itemTagName)
Regexp.compile("<#{tagName} data-md=[^>]*>(?:[^<>]|<#{itemTagName} data-md=[^>]*>|<\\/#{itemTagName}>|(\\g<0>))*<\/#{tagName}>")
end
end