Last active
April 26, 2018 22:05
-
-
Save endrit-b/f3c9d727db8de8f08216e104ddd7ea94 to your computer and use it in GitHub Desktop.
ElasticSearch 5.x - A practical overview of ES features
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GET / | |
POST /my-index/my-doc/1 | |
{ | |
"body": "foo" | |
} | |
GET /my_index/my_doc/_search | |
{ | |
"query": { | |
"match": { | |
"body": "bar" | |
} | |
} | |
} | |
PUT /library | |
{ | |
"settings": { | |
"index.number_of_shards": 1, | |
"index.number_of_replicas": 0 | |
} | |
} | |
#__________________________ | |
# | |
# Bulk indexing and Search | |
# | |
#__________________________ | |
# | |
# When you have a lot of docs to index you should | |
# use bulk API of ES | |
POST /library/books/_bulk | |
{"index": {"_id": 1}} | |
{"title": "The quick brown fox", "price": 5, "colors": ["red", "green", "blue"]} | |
{"index": {"_id": 2}} | |
{"title": "The quick brown fox jumps over the lazy dog", "price": 15, "colors": ["blue", "yellow"]} | |
{"index": {"_id": 3}} | |
{"title": "The quick brown fox jumps over the lazy dog", "price": 8, "colors": ["red", "blue"]} | |
{"index": {"_id": 4}} | |
{"title": "Brown fox brown dog", "price": 2, "colors": ["black", "yellow", "red", "blue"]} | |
{"index": {"_id": 5}} | |
{"title": "Lazy dog", "price": 9, "colors": ["red", "blue", "green"]} | |
GET library/books/_search | |
GET library/books/_search | |
{ | |
"query": { | |
"match": { | |
"title": "fox" | |
} | |
} | |
} | |
# How about 'quick' and 'dog'? | |
GET library/books/_search | |
{ | |
"query": { | |
"match": { | |
"title": "quick dog" | |
} | |
} | |
} | |
# let's be more strict when we search | |
GET library/books/_search | |
{ | |
"query": { | |
"match_phrase": { | |
"title": "quick brown" | |
} | |
} | |
} | |
# Results are ranked based on relevance score (_score) | |
GET library/books/_search | |
{ | |
"query": { | |
"match": { | |
"title": "quick" | |
} | |
} | |
} | |
#_____________________ | |
# We can also do some boolean query combination | |
# | |
# Let's find all docs with "quick" and "lazy dog" | |
GET /library/books/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"title": "quick" | |
} | |
}, | |
{ | |
"match_phrase": { | |
"title": "lazy dog" | |
} | |
} | |
] | |
} | |
} | |
} | |
#___________________________________________ | |
# or we can negate the query clause | |
GET /library/books/_search | |
{ | |
"query": { | |
"bool": { | |
"must_not": [ | |
{ | |
"match": { | |
"title": "lazy" | |
} | |
}, | |
{ | |
"match_phrase": { | |
"title": "quick dog" | |
} | |
} | |
] | |
} | |
} | |
} | |
#___________________________________________________ | |
# Combinations can be boosted for different effects | |
GET /library/books/_search | |
{ | |
"query": { | |
"bool": { | |
"should": [ | |
{ | |
"match": { | |
"title": { | |
"query": "quick dog" | |
} | |
} | |
}, | |
{ | |
"match_phrase": { | |
"title": { | |
"query": "lazy dog", | |
"boost": 3 | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
#__________________________ | |
# Sometimes, for better distinction in the UI side | |
# we tend to highlight the matched terms | |
GET /library/books/_search | |
{ | |
"query": { | |
"bool": { | |
"should": [ | |
{ | |
"match": { | |
"title": { | |
"query": "quick dog" | |
} | |
} | |
}, | |
{ | |
"match_phrase": { | |
"title": { | |
"query": "lazy dog", | |
"boost": 3 | |
} | |
} | |
} | |
] | |
} | |
}, | |
"highlight": { | |
"fields": { | |
"title": {} | |
} | |
} | |
} | |
# More info on: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-highlighting.html | |
#___________________________________ | |
# We can also perform filtering | |
# Filtering is faster than quering | |
GET /library/books/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match": { | |
"title": "dog" | |
} | |
} | |
], | |
"filter": { | |
"range": { | |
"price": { | |
"gte": 5, | |
"lte": 10 | |
} | |
} | |
} | |
} | |
} | |
} | |
# Filtering can be applied without query clause | |
GET /library/books/_search | |
{ | |
"query": { | |
"bool": { | |
"filter": { | |
"range": { | |
"price": { | |
"gte": 5 | |
} | |
} | |
} | |
} | |
} | |
} | |
# More info on: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html | |
#_________________________________________________________ | |
# How did that work? | |
# We need to know and understand how to tune elastic search, | |
# in order to make it search in a "managed" manner | |
# _analysis | |
# Analysis = tokenization + token filters | |
GET /library/_analyze | |
{ | |
"tokenizer": "standard", | |
"text": "Brown fox brown dog" | |
} | |
# Token filters can manipulate these token | |
GET /library/_analyze | |
{ | |
"tokenizer": "standard", | |
"filter": ["lowercase"], | |
"text": "Brown fox brown dog" | |
} | |
# There is a wide array of toke filters | |
GET /library/_analyze | |
{ | |
"tokenizer": "standard", | |
"filter": ["lowercase", "unique"], | |
"text": "Brown fox brown dog" | |
} | |
#___________________________________________________ | |
# A tokenizer + 0 or more token filters = Analyzers | |
GET /library/_analyze | |
{ | |
"analyzer": "standard", | |
"text": "Brown fox brown dog" | |
} | |
#___________________________________________________ | |
# Understanding analysis is very important, because | |
# the emmitted tokens can significatly change the results | |
# helps your queries be more relevant | |
GET /library/_analyze | |
{ | |
"tokenizer": "standard", | |
"filter": ["lowercase"], | |
"text": "ThE quick.brown_FOx Jumped! $19.95 @ 3.0" | |
} | |
GET /library/_analyze | |
{ | |
"tokenizer": "letter", | |
"filter": ["lowercase"], | |
"text": "ThE quick.brown_FOx Jumped! $19.95 @ 3.0" | |
} | |
# Another example is with uax_url_email tokenizer | |
GET /library/_analyze | |
{ | |
"tokenizer": "standard", | |
"text": "example@example.com website: https:/elastic.co" | |
} | |
GET /library/_analyze | |
{ | |
"tokenizer": "uax_url_email", | |
"text": "example@example.com website: https://elastic.co" | |
} | |
# More info on: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/analysis.html | |
#_____________________________________ | |
# Aggregations can be used to explore | |
# and extract insights from your data | |
GET /library/books/_search | |
{ | |
"size": 0, | |
"aggs": { | |
"popular-colors": { | |
"terms": { | |
"field": "colors.keyword" | |
} | |
} | |
} | |
} | |
# We can use aggs alongside with search | |
GET /library/books/_search | |
{ | |
"query": { | |
"match": { | |
"title": "dog" | |
} | |
}, | |
"aggs": { | |
"popular-colors": { | |
"terms": { | |
"field": "colors.keyword" | |
} | |
} | |
} | |
} | |
# Aggregations can be nested, and be used to perform calculations | |
GET /library/books/_search | |
{ | |
"size": 0, | |
"aggs": { | |
"popular-colors": { | |
"terms": { | |
"field": "colors.keyword" | |
}, | |
"aggs": { | |
"avg-price-per-color": { | |
"avg": { | |
"field": "price" | |
} | |
} | |
} | |
} | |
} | |
} | |
# Documents can be updated at any time by re-indexing them | |
POST /library/books/4 | |
{ | |
"title": "The Brown fox and a brown dog", | |
"price": 12, | |
"colors": ["black", "red", "blue"] | |
} | |
# or by using the _update API, for partial updates | |
POST /library/books/4/_update | |
{ | |
"doc": { | |
"price": 6 | |
} | |
} | |
GET /library/books/4 | |
#____________________________________________________ | |
# Elastic search dynamically defines index schema | |
# when documents are indexed - tries to interfere the data type | |
GET /library/books/_mapping | |
# We can define the mapping (schema) when we create the index | |
PUT /famous-librarians | |
{ | |
"settings": { | |
"index": { | |
"number_of_shards": 2, | |
"number_of_replicas": 0, | |
"analysis": { | |
"analyzer": { | |
"my-desc-analyzer": { | |
"type": "custom", | |
"tokenizer": "uax_url_email", | |
"filter": ["lowercase"] | |
} | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"librarian": { | |
"properties": { | |
"name":{ | |
"type": "text" | |
}, | |
"fave-colors": { | |
"type": "keyword" | |
}, | |
"birth-date": { | |
"type": "date", | |
"format": "year_month_day" | |
}, | |
"hometowrn": { | |
"type": "geo_point" | |
}, | |
"description": { | |
"type": "text", | |
"analyzer": "my-desc-analyzer" | |
} | |
} | |
} | |
} | |
} | |
GET /famous-librarians/_mapping | |
PUT /famous-librarians/librarian/1 | |
{ | |
"name": "Filan Fisteku", | |
"colors": ["yellow", "black"], | |
"birthdate": "1877-11-11", | |
"hometown": { | |
"lat": 32.456478, | |
"lon": -87.421111 | |
}, | |
"description": "Pioneered the establishment of libraries in Balkans - filan.fisteku@fiction.com" | |
} | |
GET /famous-librarians/librarian/1 | |
# be careful when you do this. | |
DELETE /library | |
DELETE /famous-librarians | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment