Skip to content

Instantly share code, notes, and snippets.

@ijharulislam
Last active March 9, 2018 16:13
Show Gist options
  • Save ijharulislam/5575566328a547506f0f8483b06ee348 to your computer and use it in GitHub Desktop.
Save ijharulislam/5575566328a547506f0f8483b06ee348 to your computer and use it in GitHub Desktop.
# Docomo
input_data={
"format": [
{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "item_name": "name", "xpath": "/html/body/div[2]/div[3]/div[1]/main/article/section[1]/div[1]/h2"},
{"regex_item": "${1} ", "regex_match": "([\\d\\-\\\u2212\\\u2010]+)", "remove_tag": "1", "item_name": "zip", "xpath": "/html/body/div[2]/div[3]/div[1]/main/article/section[1]/div[2]/div[1]/div/div[2]/figure/table/tbody/tr[1]/td/text()[1]"},
{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "item_name": "address", "xpath": "/html/body/div[2]/div[3]/div[1]/main/article/section[1]/div[2]/div[1]/div/div[2]/figure/table/tbody/tr[1]/td/text()[2]"},
{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "item_name": "tel", "xpath": "/html/body/div[2]/div[3]/div[1]/main/article/section[1]/div[2]/div[1]/div/div[2]/figure/table/tbody/tr[1]/td/span[1]"}],
"coordination": [
{
"srid": "",
"regex_match": "ll=([\\-\\d\\.]+),([\\-\\d\\.]+).+",
"latlon_xpath":'//*[@id="map"]/div[2]/div/div[2]/a',
"mapurl_regex": "",
"tinymapurl_regex": "",
"lat": "$1",
"mapurl_xpath": "", "lon": "$2"}], "target": [{"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E5%AE%97%E8%B0%B7%E5%9C%B0%E6%96%B9&sc=area&pg=1&id=0135130101200&map=g&ot=s&p=012", "id": 1}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E8%95%A8%E5%B8%82&sc=area&pg=1&id=0354124100300&map=g&ot=s&p=110", "id": 2}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E5%9B%9B%E8%A1%97%E9%81%93%E5%B8%82&sc=area&pg=1&id=0300305415600&map=g&ot=s&p=120", "id": 3}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E6%A8%AA%E9%A0%88%E8%B3%80%E5%B8%82&sc=area&pg=1&id=0300302070400&map=g&ot=s&p=142", "id": 4}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E6%AD%A6%E8%94%B5%E6%9D%91%E5%B1%B1%E5%B8%82&sc=area&pg=1&id=0300306946200&map=g&ot=s&p=132", "id": 5}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E6%9C%AC%E5%AE%AE%E5%B8%82&sc=area&pg=1&id=0200201086000&map=g&ot=s&p=070", "id": 6}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E7%B1%B3%E6%B2%A2%E5%B8%82&sc=area&pg=1&id=0200200553700&map=g&ot=s&p=060", "id": 7}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E9%99%B8%E5%89%8D%E9%AB%98%E7%94%B0%E5%B8%82&sc=area&pg=1&id=0296160192700&map=g&ot=s&p=030", "id": 8}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E7%B1%B3%E6%B2%A2%E5%B8%82&sc=area&pg=1&id=0292120106700&map=g&ot=s&p=060", "id": 9}, {"url": "https://www.nttdocomo.co.jp/support/shop/search/shop.html?t=s&c%5B%5D=%E6%A8%AA%E6%89%8B%E5%B8%82&sc=area&pg=1&id=0295150152900&map=g&ot=s&p=050", "id": 10}]}
Hotel-Livemax
input_data={"target": [
{"id": 1, "url": "https://www.hotel-livemax.com/osaka/umedadoyama/access"},
{"id": 2, "url": "https://www.hotel-livemax.com/kagoshima/kagoshima/access"},
{"id": 3, "url": "https://www.hotel-livemax.com/tokyo/shiomist/access"},
{"id": 4, "url": "https://www.hotel-livemax.com/tokyo/bakurocho/access"},
{"id": 5, "url": "https://www.hotel-livemax.com/tokyo/kitafuchu/access"},
{"id": 6, "url": "https://www.hotel-livemax.com/aichi/nagoya/access"},
{"id": 7, "url": "https://www.hotel-livemax.com/chiba/mihama/access"},
{"id": 8, "url": "https://www.hotel-livemax.com/osaka/esaka/access"},
{"id": 9, "url": "https://www.hotel-livemax.com/kanagawa/sagamiharast/access"},
{"id": 10, "url": "https://www.hotel-livemax.com/ishikawa/kanazawast/access"}],
"coordination": [
{
"tinymapurl_regex": "",
"lon": "${1}",
"srid": "",
"lat": "${2}",
"latlon_xpath": '',
"regex_match": "ll=([\\-\\d\\.]+),([\\-\\d\\.]+).+",
"mapurl_xpath": "",
"mapurl_regex": ""}], "format": [{"remove_tag": "1", "regex_match": "Null", "xpath": "//*[@id=\"page_navi\"]/ul/li[3]/a", "item_name": "name", "regex_item": "Null"}, {"remove_tag": "1", "regex_match": "\u3012([\\d\\-\\\u2212\\\u2010]+)", "xpath": "//*[@id=\"top_info\"]/dl/dd[1]/text()[1]", "item_name": "zip", "regex_item": "${1}"}, {"remove_tag": "1", "regex_match": "\u3012[\\d\\-\\\u2212\\\u2010]+\\s*([^<]+)", "xpath": "//*[@id=\"top_info\"]/dl/dd[1]/text()[1]", "item_name": "address", "regex_item": "${1}"}, {"remove_tag": "1", "regex_match": "TEL\\s*[:\\.\\/]*\\s*([\\d\\-\\)\\(]+)", "xpath": "//*[@id=\"top_info\"]/dl/dd[1]", "item_name": "tel", "regex_item": "${1}"}]}
Sun Route
input_data={"coordination": [
{
"regex_match": "https:\\/\\/maps\\.google\\.com\\/maps\\?.*ll=([\\-\\d\\.]+),([\\-\\d\\.]+)",
"mapurl_regex": "",
"lat": "${1}",
"lon": "${2}",
"latlon_xpath": "",
"mapurl_xpath": "", "srid": "", "tinymapurl_regex": ""}], "target": [{"id": 1, "url": "https://www.sunroute.jp/HotelInfo/tohoku/patiogoshogawara/index.html"}, {"id": 2, "url": "https://www.sunroute.jp/HotelInfo/chugoku/tokuyama/index.html"}, {"id": 3, "url": "https://www.sunroute.jp/HotelInfo/kinki/osakanamba/index.html"}, {"id": 4, "url": "https://www.sunroute.jp/HotelInfo/tohoku/fukushima/index.html"}, {"id": 5, "url": "https://www.sunroute.jp/HotelInfo/tokyo_kanagawa/takadanobaba/index.html"}, {"id": 6, "url": "https://www.sunroute.jp/HotelInfo/koshinetsu_hokuriku/nagano/index.html"}, {"id": 7, "url": "https://www.sunroute.jp/HotelInfo/tokyo_kanagawa/ginza/index.html"}, {"id": 8, "url": "https://www.sunroute.jp/HotelInfo/koshinetsu_hokuriku/skyhoteluozu/index.html"}, {"id": 9, "url": "https://www.sunroute.jp/HotelInfo/koshinetsu_hokuriku/ueda/index.html"}, {"id": 10, "url": "https://www.sunroute.jp/HotelInfo/kanto/gardenpalace/index.html"}], "format": [{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "item_name": "name", "xpath": "//*[@id=\"slides\"]/div[2]/ol/li[4]/text()"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*([\\d\\-\\\u2212\\\u2010]+)", "remove_tag": "1", "item_name": "zip", "xpath": "//address"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*[\\d\\-\\\u2212\\\u2010]+\\s*(.+)$", "remove_tag": "1", "item_name": "address", "xpath": "//address"}, {"regex_item": "${1}", "regex_match": "TEL\\s*[:\\.\\/]*\\s*([\\d\\-\\)\\(]+)", "remove_tag": "1", "item_name": "tel", "xpath": "//pao2"}]}
Super hotel
input_data={"target": [{"url": "http://www.superhotel.co.jp/s_hotels/tottorikita/tottorikita.html", "id": 1}, {"url": "http://www.superhotel.co.jp/s_hotels/hirose/hirose.html", "id": 2}, {"url": "http://www.superhotel.co.jp/s_hotels/ueno/ueno.html", "id": 3}, {"url": "http://www.superhotel.co.jp/s_hotels/fujinomiya/", "id": 4}, {"url": "http://www.superhotel.co.jp/s_hotels/omiya/omiya.html", "id": 5}, {"url": "http://www.superhotel.co.jp/s_hotels/kitami/kitami.html", "id": 6}, {"url": "http://www.superhotel.co.jp/s_hotels/sendai/sendai.html", "id": 7}, {"url": "http://www.superhotel.co.jp/s_hotels/kushiro/kushiro.html", "id": 8}, {"url": "http://www.superhotel.co.jp/s_hotels/akihabara/", "id": 9}, {"url": "http://www.superhotel.co.jp/s_hotels/hachinohe/hachinohe.html", "id": 10}], "format": [{"regex_match": "Null", "xpath": "//p[@class=\"hotel_name__ja\"]", "item_name": "name", "remove_tag": "1", "regex_item": "Null"}, {"regex_match": "\u3012\\s*([\\d\\-\\\u2212\\\u2010]+)", "xpath": "//*[@id=\"v_address\"]", "item_name": "zip", "remove_tag": "1", "regex_item": "${1}"}, {"regex_match": "\u3012\\s*[\\d\\-\\\u2212\\\u2010]+\\s*(.+)\\s*TEL", "xpath": "//*[@id=\"v_address\"]", "item_name": "address", "remove_tag": "1", "regex_item": "${1}"}, {"regex_match": "TEL[\uff1a:]([\\d\\-]+)", "xpath": "//p[@class=\"tel\"]", "item_name": "tel", "remove_tag": "1", "regex_item": "${1}"}],
"coordination": [
{"regex_match": "navitime\\.geo\\.LatLng\\(([\\d\\-\\.]+),([\\d\\-\\.]+)\\)",
"tinymapurl_regex": "",
"mapurl_regex": "",
"latlon_xpath": "/html/body/script[13]/text()",
"srid": "",
"mapurl_xpath": "", "lat": "${1}", "lon": "${2}"}]}
input_data={"target": [
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_584", "id": 1},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_210", "id": 2},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_565", "id": 3},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_527", "id": 4},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_66", "id": 5},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_84", "id": 6},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_571", "id": 7},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_533", "id": 8},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_560", "id": 9},
{"url": "https://www.route-inn.co.jp/search/hotel/parking_hotel_id_649", "id": 10}],
"format": [{"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "xpath": "//*[@id=\"crumbs\"]/li[3]/a", "item_name": "name"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*([\\d\\-\\\u2212\\\u2010]+)", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]/p[1]", "item_name": "zip"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*[\\d\\-\\\u2212\\\u2010]+\\s*(.+)$", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]/p[1]", "item_name": "address"}, {"regex_item": "Null", "regex_match": "Null", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]/p[1]/span/strong", "item_name": "tel"}, {"regex_item": "${1}", "regex_match": "^(?:\u3010\u516c\u5f0f\u3011)*(.+)\\s*[\\\uff5c\\|]", "remove_tag": "1", "xpath": "/html/head/title", "item_name": "name"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*([\\d\\-\\\u2212\\\u2010]+)", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]", "item_name": "zip"}, {"regex_item": "${1}", "regex_match": "\u3012\\s*[\\d\\-\\\u2212\\\u2010]+\\s*(.+)$", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]", "item_name": "address"}, {"regex_item": "${1}", "regex_match": "TEL:\\s*([\\d\\-\\)\\(]+)", "remove_tag": "1", "xpath": "//*[@id=\"page_topbox\"]", "item_name": "tel"}], "coordination": [{"srid": "", "latlon_xpath": "/html/body", "mapurl_xpath": "", "lat": "${1}", "lon": "${2}", "regex_match": "navitime\\.geo\\.LatLng\\(([\\d\\-\\.]+),([\\d\\-\\.]+)\\)", "mapurl_regex": "", "tinymapurl_regex": ""}]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment