Skip to content

Instantly share code, notes, and snippets.

@coreyhermanson
Created April 4, 2017 17:47
Show Gist options
  • Save coreyhermanson/880c1a2a6a6fa95c3c3b25c294971e42 to your computer and use it in GitHub Desktop.
Save coreyhermanson/880c1a2a6a6fa95c3c3b25c294971e42 to your computer and use it in GitHub Desktop.
BrightPlanet Harvest API: Deep Web Project Examples

BrightPlanet Harvest API: Deep Web harvest examples

One-Time and Scheduled harvest examples

  1. Unscheduled: Deep Web harvest will execute immediately (no delay parameter), and run once (scheduleType="ONCE" and no interval parameter)
 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_APItest_once",
  "deepHarvestParameters": {
    "deepQueries": [
      "\"theresa may\" AND brexit",
      "p2p OR \"peer to peer\" OR \"sharing economy\""
    ],
    "categoryIds": [
      2895
    ]
  },
  "filterQuery": "brexit OR \"theresa may\" OR p2p OR \"peer to peer\" OR \"sharing economy\" OR (economy AND sharing)",
  "tags": [
    "source_News", "topic_Finance"
  ],
  "maxDocCount": 500,
  "maxDocSize": -1
}
  1. Scheduled: Deep Web harvest will execute in one hour (delay in milliseconds), scheduled to run ongoing (scheduleType="RECURRING" parameter) every 48 hours (interval in milliseconds)
 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "RECURRING",
  "name": "DW_APItest_ongoing",
  "delay": 3600000,
  "interval": 172800000,
  "deepHarvestParameters": {
    "deepQueries": [
      "\"theresa may\" AND brexit",
      "p2p OR \"peer to peer\" OR \"sharing economy\""
    ],
    "categoryIds": [
      2895
    ]
  },
  "filterQuery": "brexit OR \"theresa may\" OR p2p OR \"peer to peer\" OR \"sharing economy\" OR (economy AND sharing)",
  "tags": [
    "source_News", "topic_Finance"
  ],
  "maxDocCount": 500,
  "maxDocSize": -1
}

Example harvests for each source category in full harvest project

*All examples set to execute immediately. If you were creating multiple harvests using the same categories, it is highly recommended to space your harvest events out (using the 'delay' parameter)

US News

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_USNews_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende",
        "energiewende AND nuclear AND \"phase out\"",
        "energiewende AND \"Reduced emission\""
    ],
    "categoryIds": [
      2851
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_News", "topic_Energy"
  ],
  "maxDocCount": 500,
  "maxDocSize": -1
}

UK News

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_UKNews_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende",
        "energiewende AND nuclear AND \"phase out\"",
        "energiewende AND \"Reduced emission\""
    ],
    "categoryIds": [
      2844, 2845, 2846
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_News", "topic_Energy"
  ],
  "maxDocCount": 500,
  "maxDocSize": -1
}

World News

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_WorldNews_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende",
        "energiewende AND nuclear AND \"phase out\"",
        "energiewende AND \"Reduced emission\""
    ],
    "categoryIds": [
      2895
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_News", "topic_Energy"
  ],
  "maxDocCount": 500,
  "maxDocSize": -1
}

Academic

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_Academic_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende",
        "energiewende AND nuclear AND \"phase out\"",
        "energiewende AND \"Reduced emission\""
    ],
    "categoryIds": [
      2847, 2938
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_Academic", "topic_Energy"
  ],
  "maxDocCount": 500,
  "maxDocSize": -1
}

Government

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_Government_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende",
        "energiewende AND nuclear AND \"phase out\"",
        "energiewende AND \"Reduced emission\""
    ],
    "categoryIds": [
      2841
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_Government", "topic_Energy"
  ],
  "maxDocCount": 1000,
  "maxDocSize": -1
}

Finance

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_Finance_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende",
        "energiewende AND nuclear AND \"phase out\"",
        "energiewende AND \"Reduced emission\""
    ],
    "categoryIds": [
      2840
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_Investments", "topic_Energy"
  ],
  "maxDocCount": 500,
  "maxDocSize": -1
}

Industry

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_Industry_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende",
        "energiewende AND nuclear AND \"phase out\"",
        "energiewende AND \"Reduced emission\""
    ],
    "categoryIds": [
      2839, 2872, 2875, 2842, 2868
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_Industry", "topic_Energy"
  ],
  "maxDocCount": 500,
  "maxDocSize": -1
}

Patents

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_Patents_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende"
    ],
    "categoryIds": [
      2854, 2876
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_Patents", "topic_Energy"
  ],
  "maxDocCount": 5000,
  "maxDocSize": -1
}

Patent Applications

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_PatentApplications_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende"
    ],
    "categoryIds": [
      2852
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_PatentApplications", "topic_Energy"
  ],
  "maxDocCount": 5000,
  "maxDocSize": -1
}

General

 {
  "id": "string",
  "harvestEventType": "DEEP",
  "scheduleType": "ONCE",
  "name": "DW_General_EnergieWende",
  "deepHarvestParameters": {
    "deepQueries": [
        "energiewende",
        "energiewende AND nuclear AND \"phase out\"",
        "energiewende AND \"Reduced emission\""
    ],
    "categoryIds": [
      2896, 2670, 2901
    ]
  },
  "filterQuery": "energiewende OR \"phase out\" OR \"reduced emission\" OR nuclear",
  "tags": [
    "source_General", "topic_Energy"
  ],
  "maxDocCount": 100,
  "maxDocSize": -1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment