Skip to content

Instantly share code, notes, and snippets.

@kleem
Last active August 29, 2015 14:02
Show Gist options
  • Save kleem/c8bed17528993f1e0dba to your computer and use it in GitHub Desktop.
Save kleem/c8bed17528993f1e0dba to your computer and use it in GitHub Desktop.
OpeNER - Text annotation visualization
# example reviews
# 52a73a9fae9eef5a506472b9 "Ancienne cours..."
# 533eccf9ae9eef521e6292b1 "Nieuw Dakota..."
# 533eccf9ae9eef521e6294d3 "La peor discoteca..."
d3.json "http://tour-pedia.org/api/getReviewDetails?id=533eccf9ae9eef521e6292b1", (kaf_data) ->
# console.log kaf_data
### convert to VAnn format ###
# entity_map = objectify kaf_data.analysis.json.entities
data = []
for tid, term of kaf_data.analysis.json.terms
token = {text: term.text}
if term.lemma?
token.lemma = term.lemma
if term.pos?
token.pos = switch term.pos
when 'N' then 'noun'
when 'R' then 'noun' # proper noun!
when 'V' then 'verb'
when 'G' then 'adjective'
when 'A' then 'adverb'
when 'Q' then 'pronoun' # check
when 'P' then 'preposition'
when 'D' then 'determiner'
when 'C' then 'conjunction'
# when 'O' then 'other'
else undefined
# skip 'other' parts of speech
if term.pos is 'O'
token.skip = true
if term.sentiment?
if term.sentiment.polarity?
token.polarity = term.sentiment.polarity
else if term.sentiment.sentiment_modifier?
token.sentiment_modifier = term.sentiment.sentiment_modifier
# if term.entity?
# token.netype = entity_map[term.entity].type
if term.pos? and term.pos is 'R'
# proper noun
token.proper = true
data.push token
data.push {text: ' ', skip: true}
elems = d3.select('#text').selectAll('span')
.data(data)
.enter().append('span')
elems.filter((d)->d.skip? and d.skip)
.html((d) -> d.text)
rubys = elems.filter((d)->not d.skip? or not d.skip).append('ruby')
rubys.append('rb')
.html((d) -> d.text) # html is needed to support  
### lemma ###
rubys.filter((d) -> d.lemma?).append('rt')
.attr('class', 'lemma')
.text((d) -> d.lemma)
### store textual representations into data ###
elems.each (d) ->
d.elem = this
### VISUALIZATION ###
svg = d3.select('#annotations')
### SVG lemma
lemmas = svg.selectAll('.lemma')
.data(data.filter((d) -> d.lemma?))
.enter().append('text')
.attr('class', 'lemma')
.text((d) -> d.lemma) ###
### proper noun halo ###
proper_r = 9
propers = svg.selectAll('.proper')
.data(data.filter((d) -> d.proper? and d.proper))
.enter().append('circle')
.attr('class', 'proper')
.attr('r', proper_r)
propers.append('title')
.text((d) -> (if d.proper? and d.proper then 'proper ' else '') + d.pos)
### pos ###
poss = svg.selectAll('.pos')
.data(data.filter((d) -> d.pos?))
.enter().append('use')
.attr('class', 'pos')
.attr('xlink:href', (d)->"#pos_#{d.pos}")
poss.append('title')
.text((d) -> (if d.proper? and d.proper then 'proper ' else '') + d.pos)
### named entity halo ###
nes = svg.selectAll('.ne')
.data(data.filter((d) -> d.netype?))
.enter().append('circle')
.attr('class', 'ne')
### normal token underline ###
tokens = svg.selectAll('.token')
.data(data.filter((d) -> not d.skip))
.enter().append('rect')
.attr('class', 'token')
### polarity underline (positive, negative or neutral) ###
polarities = svg.selectAll('.polarity')
.data(data.filter((d) -> d.polarity? and d.polarity in ['positive','negative','neutral']))
.enter().append('path')
.attr('class', 'polarity')
polarities.append('title')
.text((d) -> "#{d.polarity} polarity")
### sentiment modifier underline (intensifier or weakener) ###
senmods = svg.selectAll('.senmod')
.data(data.filter((d) -> d.sentiment_modifier? and d.sentiment_modifier in ['intensifier','weakener']))
.enter().append('path')
.attr('class', 'senmod')
senmods.append('title')
.text((d) -> d.sentiment_modifier)
### visualization parameters ###
gap = 0 # distance between token underlines
dist = 1 # distance between text and token underlines
th = 1 # thickness of token and polarity underlines
ldist = 10 # distance between token underlines and lemma baselines
pold = 22
# pos symbol center (from underline bottom left corner)
pos_dx = 4
pos_dy = 6
# parameters that control the curvature of the polarity underline
xc = 2
yc = 12
neradius = 22 # radius of ne halos
necolor = d3.scale.ordinal()
.domain(['person','location','date','organization','misc'])
.range(['#00A777','#F58020','#999','#00A1CF','#E08566'])
### redraw the annotations ###
redraw = () ->
### adpat the annotation svg to the text div ###
new_svg_bbox = d3.select('#text')[0][0].getBoundingClientRect()
svg
.attr('width', new_svg_bbox.width)
.attr('height', new_svg_bbox.height)
### compute new bboxes ###
for d in data
d.bbox = d.elem.getBoundingClientRect()
d.bbox.width = d.bbox.right - d.bbox.left
d.bbox.height = d.bbox.bottom - d.bbox.top
tokens
.attr('x', (d) -> d.bbox.left+gap/2)
.attr('y', (d) -> d.bbox.bottom+dist)
.attr('width', (d) -> d.bbox.width-gap)
.attr('height', th)
###
lemmas
.attr('x', (d) -> d.bbox.left+d.bbox.width/2)
.attr('y', (d) -> d.bbox.bottom+dist+th+ldist)###
poss
.attr('x', (d) -> d.bbox.left+gap+pos_dx)
.attr('y', (d) -> d.bbox.bottom+dist+th+pos_dy)
propers
.attr('cx', (d) -> d.bbox.left+gap+pos_dx)
.attr('cy', (d) -> d.bbox.bottom+dist+th+pos_dy)
polarities
.attr('d', (d) ->
x1 = d.bbox.left+gap/2
x2 = d.bbox.right-gap/2
y = d.bbox.bottom+pold+dist-2*yc/3
y_eq = d.bbox.bottom+pold+dist-2*th
#y_eq2 = d.bbox.bottom+pold+dist+2
if d.polarity is 'neutral'
return "M#{x1} #{y_eq} L#{x2} #{y_eq} L#{x2} #{y_eq+th} L#{x1} #{y_eq+th}"
else
return "M#{x1} #{y} C#{x1+xc} #{y+yc} #{x2-xc} #{y+yc} #{x2} #{y} L#{x2} #{y+th} C#{x2-xc} #{y+th+yc} #{x1+xc} #{y+th+yc} #{x1} #{y+th} z"
)
polarities.filter((d)->d.polarity is 'negative')
.attr('transform', (d)->"scale(1,-1) translate(0,#{-2*(d.bbox.bottom+pold+dist-2)})")
senmods
.attr('d', (d) ->
x1 = d.bbox.left+gap/2
x2 = d.bbox.right-gap/2
yl = d.bbox.bottom+pold+dist-2*th+2
yh = yl-5
if d.sentiment_modifier is 'intensifier'
return "M#{x1} #{yl} L#{x2} #{yh} L#{x2} #{yl} L#{x1} #{yl}"
else
return "M#{x1} #{yh} L#{x2} #{yl} L#{x2} #{yl} L#{x1} #{yl}"
)
nes
.attr('cx', (d) -> d.bbox.left + d.bbox.width/2)
.attr('cy', (d) -> d.bbox.top + d.bbox.height/2)
.attr('r', neradius)
.attr('fill', (d) -> necolor(d.netype))
redraw()
window.onresize = redraw
html, body {
margin: 0;
padding: 0;
background: white;
}
#text {
position: absolute;
/* this is needed to have svg events work */
pointer-events: none;
line-height: 4em;
font-family: Georgia;
font-size: 18px;
/*text-align: justify;*/
/* padding is used to make sure the svg fits */
padding: 12px;
}
#text > span {
padding-left: 1px;
padding-right: 1px;
}
rb {
/* this enables text selection */
pointer-events: all;
padding-bottom: 2px;
}
rt {
padding-left: 16px;
padding-right: 16px;
}
#annotations {
position: absolute;
}
.token {
fill: #999;
}
.lemma {
font-size: 9px;
font-family: sans-serif;
text-anchor: middle;
color: #999;
text-align: center;
}
ruby {
ruby-position: after;
-webkit-ruby-position: after;
}
.pos, .polarity, .senmod {
fill: #2A9DC2;
}
.proper {
fill: #555;
}
.ne, .proper {
fill-opacity: 0.15;
}
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="description" content="OpeNER - Text annotation visualization" />
<title>OpeNER - Text annotation visualization</title>
<link rel="stylesheet" href="index.css">
<script src="http://d3js.org/d3.v3.min.js"></script>
<script src="http://d3js.org/queue.v1.min.js"></script>
</head>
<body>
<svg id="annotations">
<defs>
<path id="pos_noun" d="m -3,-3 6,0 0,6 -6,0 z"/>
<path id="pos_verb" d="M -3,-4 4,0 -3,4 z"/>
<path id="pos_adjective" d="m -3,-3 0,6 6,0 0,-6 z m 2,2 2,0 0,2 -2,0 z"/>
<path id="pos_adverb" d="M -3 -4 L -3 4 L 4 0 L -3 -4 z M -1.4375 -1.5 L 1.1875 0 L -1.4375 1.5 L -1.4375 -1.5 z"/>
<path id="pos_pronoun" d="M -3 -3 L -3 -1 L -1 -1 L -1 -3 L -3 -3 z M 1 -3 L 1 -1 L 3 -1 L 3 -3 L 1 -3 z M -3 1 L -3 3 L -1 3 L -1 1 L -3 1 z M 1 1 L 1 3 L 3 3 L 3 1 L 1 1 z"/>
<path id="pos_preposition" d="m -1,-6 0,5 2,0 2,0 0,-2 -2,0 0,-3 z"/>
<path id="pos_determiner" d="m -1,-6 0,5 2,0 0,-5 z"/>
<path id="pos_conjunction" d="m -1,-6 0,3 -2,0 0,2 2,0 0,2 2,0 0,-2 2,0 0,-2 -2,0 0,-3 z"/>
<path id="pos_other" d="m -1,-6 0,2 2,0 0,-2 z"/>
</defs>
</svg>
<div id="text"></div>
<script src="index.js"></script>
</body>
</html>
(function() {
d3.json("http://tour-pedia.org/api/getReviewDetails?id=533eccf9ae9eef521e6292b1", function(kaf_data) {
/* convert to VAnn format
*/
var data, dist, elems, gap, ldist, necolor, neradius, nes, polarities, pold, pos_dx, pos_dy, poss, proper_r, propers, redraw, rubys, senmods, svg, term, th, tid, token, tokens, xc, yc, _ref;
data = [];
_ref = kaf_data.analysis.json.terms;
for (tid in _ref) {
term = _ref[tid];
token = {
text: term.text
};
if (term.lemma != null) {
token.lemma = term.lemma;
}
if (term.pos != null) {
token.pos = (function() {
switch (term.pos) {
case 'N':
return 'noun';
case 'R':
return 'noun';
case 'V':
return 'verb';
case 'G':
return 'adjective';
case 'A':
return 'adverb';
case 'Q':
return 'pronoun';
case 'P':
return 'preposition';
case 'D':
return 'determiner';
case 'C':
return 'conjunction';
default:
return void 0;
}
})();
if (term.pos === 'O') {
token.skip = true;
}
}
if (term.sentiment != null) {
if (term.sentiment.polarity != null) {
token.polarity = term.sentiment.polarity;
} else if (term.sentiment.sentiment_modifier != null) {
token.sentiment_modifier = term.sentiment.sentiment_modifier;
}
}
if ((term.pos != null) && term.pos === 'R') {
token.proper = true;
}
data.push(token);
data.push({
text: ' ',
skip: true
});
}
elems = d3.select('#text').selectAll('span').data(data).enter().append('span');
elems.filter(function(d) {
return (d.skip != null) && d.skip;
}).html(function(d) {
return d.text;
});
rubys = elems.filter(function(d) {
return (d.skip == null) || !d.skip;
}).append('ruby');
rubys.append('rb').html(function(d) {
return d.text;
});
/* lemma
*/
rubys.filter(function(d) {
return d.lemma != null;
}).append('rt').attr('class', 'lemma').text(function(d) {
return d.lemma;
});
/* store textual representations into data
*/
elems.each(function(d) {
return d.elem = this;
});
/* VISUALIZATION
*/
svg = d3.select('#annotations');
/* SVG lemma
lemmas = svg.selectAll('.lemma')
.data(data.filter((d) -> d.lemma?))
.enter().append('text')
.attr('class', 'lemma')
.text((d) -> d.lemma)
*/
/* proper noun halo
*/
proper_r = 9;
propers = svg.selectAll('.proper').data(data.filter(function(d) {
return (d.proper != null) && d.proper;
})).enter().append('circle').attr('class', 'proper').attr('r', proper_r);
propers.append('title').text(function(d) {
return ((d.proper != null) && d.proper ? 'proper ' : '') + d.pos;
});
/* pos
*/
poss = svg.selectAll('.pos').data(data.filter(function(d) {
return d.pos != null;
})).enter().append('use').attr('class', 'pos').attr('xlink:href', function(d) {
return "#pos_" + d.pos;
});
poss.append('title').text(function(d) {
return ((d.proper != null) && d.proper ? 'proper ' : '') + d.pos;
});
/* named entity halo
*/
nes = svg.selectAll('.ne').data(data.filter(function(d) {
return d.netype != null;
})).enter().append('circle').attr('class', 'ne');
/* normal token underline
*/
tokens = svg.selectAll('.token').data(data.filter(function(d) {
return !d.skip;
})).enter().append('rect').attr('class', 'token');
/* polarity underline (positive, negative or neutral)
*/
polarities = svg.selectAll('.polarity').data(data.filter(function(d) {
var _ref1;
return (d.polarity != null) && ((_ref1 = d.polarity) === 'positive' || _ref1 === 'negative' || _ref1 === 'neutral');
})).enter().append('path').attr('class', 'polarity');
polarities.append('title').text(function(d) {
return "" + d.polarity + " polarity";
});
/* sentiment modifier underline (intensifier or weakener)
*/
senmods = svg.selectAll('.senmod').data(data.filter(function(d) {
var _ref1;
return (d.sentiment_modifier != null) && ((_ref1 = d.sentiment_modifier) === 'intensifier' || _ref1 === 'weakener');
})).enter().append('path').attr('class', 'senmod');
senmods.append('title').text(function(d) {
return d.sentiment_modifier;
});
/* visualization parameters
*/
gap = 0;
dist = 1;
th = 1;
ldist = 10;
pold = 22;
pos_dx = 4;
pos_dy = 6;
xc = 2;
yc = 12;
neradius = 22;
necolor = d3.scale.ordinal().domain(['person', 'location', 'date', 'organization', 'misc']).range(['#00A777', '#F58020', '#999', '#00A1CF', '#E08566']);
/* redraw the annotations
*/
redraw = function() {
/* adpat the annotation svg to the text div
*/
var d, new_svg_bbox, _i, _len;
new_svg_bbox = d3.select('#text')[0][0].getBoundingClientRect();
svg.attr('width', new_svg_bbox.width).attr('height', new_svg_bbox.height);
/* compute new bboxes
*/
for (_i = 0, _len = data.length; _i < _len; _i++) {
d = data[_i];
d.bbox = d.elem.getBoundingClientRect();
d.bbox.width = d.bbox.right - d.bbox.left;
d.bbox.height = d.bbox.bottom - d.bbox.top;
}
tokens.attr('x', function(d) {
return d.bbox.left + gap / 2;
}).attr('y', function(d) {
return d.bbox.bottom + dist;
}).attr('width', function(d) {
return d.bbox.width - gap;
}).attr('height', th);
/*
lemmas
.attr('x', (d) -> d.bbox.left+d.bbox.width/2)
.attr('y', (d) -> d.bbox.bottom+dist+th+ldist)
*/
poss.attr('x', function(d) {
return d.bbox.left + gap + pos_dx;
}).attr('y', function(d) {
return d.bbox.bottom + dist + th + pos_dy;
});
propers.attr('cx', function(d) {
return d.bbox.left + gap + pos_dx;
}).attr('cy', function(d) {
return d.bbox.bottom + dist + th + pos_dy;
});
polarities.attr('d', function(d) {
var x1, x2, y, y_eq;
x1 = d.bbox.left + gap / 2;
x2 = d.bbox.right - gap / 2;
y = d.bbox.bottom + pold + dist - 2 * yc / 3;
y_eq = d.bbox.bottom + pold + dist - 2 * th;
if (d.polarity === 'neutral') {
return "M" + x1 + " " + y_eq + " L" + x2 + " " + y_eq + " L" + x2 + " " + (y_eq + th) + " L" + x1 + " " + (y_eq + th);
} else {
return "M" + x1 + " " + y + " C" + (x1 + xc) + " " + (y + yc) + " " + (x2 - xc) + " " + (y + yc) + " " + x2 + " " + y + " L" + x2 + " " + (y + th) + " C" + (x2 - xc) + " " + (y + th + yc) + " " + (x1 + xc) + " " + (y + th + yc) + " " + x1 + " " + (y + th) + " z";
}
});
polarities.filter(function(d) {
return d.polarity === 'negative';
}).attr('transform', function(d) {
return "scale(1,-1) translate(0," + (-2 * (d.bbox.bottom + pold + dist - 2)) + ")";
});
senmods.attr('d', function(d) {
var x1, x2, yh, yl;
x1 = d.bbox.left + gap / 2;
x2 = d.bbox.right - gap / 2;
yl = d.bbox.bottom + pold + dist - 2 * th + 2;
yh = yl - 5;
if (d.sentiment_modifier === 'intensifier') {
return "M" + x1 + " " + yl + " L" + x2 + " " + yh + " L" + x2 + " " + yl + " L" + x1 + " " + yl;
} else {
return "M" + x1 + " " + yh + " L" + x2 + " " + yl + " L" + x2 + " " + yl + " L" + x1 + " " + yl;
}
});
return nes.attr('cx', function(d) {
return d.bbox.left + d.bbox.width / 2;
}).attr('cy', function(d) {
return d.bbox.top + d.bbox.height / 2;
}).attr('r', neradius).attr('fill', function(d) {
return necolor(d.netype);
});
};
redraw();
return window.onresize = redraw;
});
}).call(this);
@nitaku
Copy link

nitaku commented Jun 15, 2014

This experiment needs a readme! :D

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment