HTML коварен, потому что в нём много тегов и Яндекс считает нормальным придавать этим тегам разный смысл при разборе документов.
Разные теги привносят как разный семантический смысл, так и просто портят слова (склеивают, обрезают предложения, пропускают смешение алфавитов и т.п.).
Поэтому нам нужна такая табличка, чтобы быть настороже и помнить. Мы не должны забывать. И тогда, однажды.
Начало тут: webimho.ru.
Описание полей
tag | replace | crop |
---|---|---|
a | space | 0 |
/a | space | 0 |
abbr | space | 0 |
/abbr | space | 0 |
acronym | space | 0 |
/acronym | space | 0 |
address | passage | 0 |
/address | passage | 0 |
applet | space | 1 |
/applet | space | 1 |
/area | empty | 0 |
area | space | 0 |
article | passage | 0 |
/article | passage | 0 |
aside | passage | 0 |
/aside | passage | 0 |
audio | passage | 0 |
/audio | passage | 0 |
b | space | 0 |
/b | space | 0 |
/base | empty | 0 |
base | passage | 0 |
/basefont | empty | 0 |
basefont | space | 0 |
bdi | space | 0 |
/bdi | space | 0 |
bdo | space | 0 |
/bdo | space | 0 |
/bgsound | empty | 0 |
bgsound | space | 0 |
big | space | 0 |
/big | space | 0 |
blink | space | 0 |
/blink | space | 0 |
blockquote | passage | 0 |
/blockquote | passage | 0 |
body | empty | 0 |
/body | empty | 0 |
br | space | 0 |
/br | space | 0 |
button | passage | 0 |
/button | passage | 0 |
canvas | passage | 0 |
/canvas | passage | 0 |
caption | empty | 0 |
/caption | empty | 0 |
center | passage | 0 |
/center | passage | 0 |
cite | space | 0 |
/cite | space | 0 |
code | space | 0 |
/code | space | 0 |
col | empty | 0 |
/col | empty | 0 |
colgroup | empty | 0 |
/colgroup | empty | 0 |
/command | empty | 0 |
command | space | 0 |
comment | passage | 0 |
/comment | passage | 0 |
datalist | passage | 0 |
/datalist | passage | 0 |
dd | passage | 0 |
/dd | passage | 0 |
del | space | 0 |
/del | space | 0 |
details | passage | 0 |
/details | space | 0 |
dfn | space | 0 |
/dfn | space | 0 |
dir | passage | 0 |
/dir | passage | 0 |
div | passage | 0 |
/div | passage | 0 |
/dl | passage | 0 |
dl | space | 0 |
dt | passage | 0 |
/dt | passage | 0 |
em | space | 0 |
/em | space | 0 |
/embed | empty | 0 |
embed | space | 0 |
fieldset | passage | 0 |
/fieldset | passage | 0 |
figcaption | passage | 0 |
/figcaption | passage | 0 |
figure | passage | 0 |
/figure | passage | 0 |
font | space | 0 |
/font | space | 0 |
footer | passage | 0 |
/footer | passage | 0 |
form | passage | 0 |
/form | passage | 0 |
frame | empty | 0 |
/frame | empty | 0 |
frameset | empty | 0 |
/frameset | empty | 0 |
h1 | passage | 0 |
/h1 | passage | 0 |
h2 | passage | 0 |
/h2 | passage | 0 |
h3 | passage | 0 |
/h3 | passage | 0 |
h4 | passage | 0 |
/h4 | passage | 0 |
h5 | passage | 0 |
/h5 | passage | 0 |
h6 | passage | 0 |
/h6 | passage | 0 |
head | empty | 0 |
/head | empty | 0 |
header | passage | 0 |
/header | passage | 0 |
hgroup | passage | 0 |
/hgroup | passage | 0 |
/hr | empty | 0 |
hr | passage | 0 |
html | empty | 0 |
/html | empty | 0 |
i | space | 0 |
/i | space | 0 |
iframe | passage | 1 |
/iframe | passage | 1 |
/img | empty | 0 |
img | space | 0 |
/input | empty | 0 |
input | space | 0 |
ins | space | 0 |
/ins | space | 0 |
isindex | passage | 0 |
/isindex | space | 0 |
kbd | space | 0 |
/kbd | space | 0 |
/keygen | empty | 0 |
keygen | space | 0 |
label | space | 0 |
/label | space | 0 |
legend | passage | 0 |
/legend | passage | 0 |
li | passage | 0 |
/li | passage | 0 |
/link | empty | 0 |
link | passage | 0 |
main | passage | 0 |
/main | passage | 0 |
map | space | 0 |
/map | space | 0 |
mark | space | 0 |
/mark | space | 0 |
marquee | passage | 1 |
/marquee | passage | 1 |
menu | passage | 0 |
/menu | passage | 0 |
/meta | empty | 0 |
meta | space | 0 |
meter | space | 0 |
/meter | space | 0 |
nav | passage | 0 |
/nav | passage | 0 |
nobr | space | 0 |
/nobr | space | 0 |
noembed | passage | 1 |
/noembed | passage | 1 |
noframes | passage | 1 |
/noframes | passage | 1 |
object | space | 1 |
/object | space | 1 |
ol | passage | 0 |
/ol | passage | 0 |
optgroup | space | 0 |
/optgroup | space | 0 |
option | space | 0 |
/option | space | 0 |
output | space | 0 |
/output | space | 0 |
p | passage | 0 |
/p | passage | 0 |
/param | empty | 0 |
param | space | 0 |
plaintext | passage | 0 |
/plaintext | passage | 0 |
pre | passage | 0 |
/pre | passage | 0 |
progress | space | 0 |
/progress | space | 0 |
q | space | 0 |
/q | space | 0 |
rp | space | 0 |
/rp | space | 0 |
rt | space | 0 |
/rt | space | 0 |
ruby | space | 0 |
/ruby | space | 0 |
s | space | 0 |
/s | space | 0 |
samp | space | 0 |
/samp | space | 0 |
script | passage | 1 |
/script | passage | 1 |
section | passage | 0 |
/section | passage | 0 |
select | passage | 0 |
/select | passage | 0 |
small | space | 0 |
/small | space | 0 |
/source | empty | 0 |
source | space | 0 |
span | space | 0 |
/span | space | 0 |
strike | space | 0 |
/strike | space | 0 |
strong | space | 0 |
/strong | space | 0 |
style | passage | 1 |
/style | passage | 1 |
sub | space | 0 |
/sub | space | 0 |
summary | passage | 0 |
/summary | passage | 0 |
sup | space | 0 |
/sup | space | 0 |
table | empty | 0 |
/table | passage | 0 |
tbody | empty | 0 |
/tbody | empty | 0 |
td | empty | 0 |
/td | empty | 0 |
textarea | passage | 0 |
/textarea | passage | 0 |
tfoot | empty | 0 |
/tfoot | empty | 0 |
th | empty | 0 |
/th | empty | 0 |
thead | empty | 0 |
/thead | empty | 0 |
time | space | 0 |
/time | space | 0 |
title | passage | 0 |
/title | passage | 0 |
tr | empty | 0 |
/tr | empty | 0 |
tt | space | 0 |
/tt | space | 0 |
u | space | 0 |
/u | space | 0 |
ul | passage | 0 |
/ul | passage | 0 |
var | space | 0 |
/var | space | 0 |
video | passage | 0 |
/video | passage | 0 |
/wbr | empty | 0 |
wbr | space | 0 |
xmp | passage | 0 |
/xmp | passage | 0 |