Skip to content

Instantly share code, notes, and snippets.

@btbytes
Last active June 20, 2024 16:43
Show Gist options
  • Save btbytes/d2b73fd5cf3139e842d9db57140729f5 to your computer and use it in GitHub Desktop.
Save btbytes/d2b73fd5cf3139e842d9db57140729f5 to your computer and use it in GitHub Desktop.
webscraper python to java using Claude Sonnet 3.5

Prompt:

convert this to Java; use jbang to include DEPS  -- import requestsfrom bs4 import BeautifulSoupurl = 'https://example.com'response = requests.get(url)soup = BeautifulSoup(response.text, 'html.parser')# Extract data from the webpagedata = soup.find('div', class_='content')print(data.text)
//DEPS org.jsoup:jsoup:1.15.3
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
public class WebScraper {
public String scrape(String url) {
if (url == null || url.isEmpty()) {
throw new IllegalArgumentException("URL cannot be null or empty");
}
try {
new URL(url); // Validates URL format
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Invalid URL format", e);
}
try {
// Send GET request and parse the HTML
Document doc = Jsoup.connect(url).get();
// Extract data from the webpage
Element data = doc.selectFirst("div.content");
if (data != null) {
return data.text();
} else {
return "No content found with the specified class.";
}
} catch (IOException e) {
return "Error: " + e.getMessage();
}
}
public static void main(String[] args) {
WebScraper scraper = new WebScraper();
System.out.println(scraper.scrape("https://example.com"));
}
}
//DEPS org.jsoup:jsoup:1.15.3
//DEPS org.junit.jupiter:junit-jupiter-api:5.8.2
//DEPS org.junit.jupiter:junit-jupiter-engine:5.8.2
//DEPS org.mockito:mockito-core:4.5.1
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.MockedStatic;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*;
class WebScraperTest {
private WebScraper webScraper;
@BeforeEach
void setUp() {
webScraper = new WebScraper();
}
@Test
void testValidUrlWithContent() throws IOException {
String html = "<html><body><div class='content'>Test Content</div></body></html>";
Document mockDocument = Jsoup.parse(html);
try (MockedStatic<Jsoup> mockedJsoup = mockStatic(Jsoup.class)) {
mockedJsoup.when(() -> Jsoup.connect(anyString()).get()).thenReturn(mockDocument);
String result = webScraper.scrape("https://example.com");
assertEquals("Test Content", result);
}
}
@Test
void testValidUrlWithoutContentDiv() throws IOException {
String html = "<html><body><div>No Content Div</div></body></html>";
Document mockDocument = Jsoup.parse(html);
try (MockedStatic<Jsoup> mockedJsoup = mockStatic(Jsoup.class)) {
mockedJsoup.when(() -> Jsoup.connect(anyString()).get()).thenReturn(mockDocument);
String result = webScraper.scrape("https://example.com");
assertEquals("No content found with the specified class.", result);
}
}
@Test
void testEmptyUrl() {
assertThrows(IllegalArgumentException.class, () -> webScraper.scrape(""));
}
@Test
void testNullUrl() {
assertThrows(IllegalArgumentException.class, () -> webScraper.scrape(null));
}
@Test
void testInvalidUrl() {
assertThrows(IllegalArgumentException.class, () -> webScraper.scrape("not a url"));
}
@Test
void testConnectionTimeout() throws IOException {
try (MockedStatic<Jsoup> mockedJsoup = mockStatic(Jsoup.class)) {
mockedJsoup.when(() -> Jsoup.connect(anyString()).get()).thenThrow(new IOException("Connection timeout"));
String result = webScraper.scrape("https://example.com");
assertEquals("Error: Connection timeout", result);
}
}
@Test
void testHttpError() throws IOException {
try (MockedStatic<Jsoup> mockedJsoup = mockStatic(Jsoup.class)) {
mockedJsoup.when(() -> Jsoup.connect(anyString()).get()).thenThrow(new IOException("HTTP error 404"));
String result = webScraper.scrape("https://example.com");
assertEquals("Error: HTTP error 404", result);
}
}
@Test
void testEmptyDocument() throws IOException {
Document mockDocument = Jsoup.parse("");
try (MockedStatic<Jsoup> mockedJsoup = mockStatic(Jsoup.class)) {
mockedJsoup.when(() -> Jsoup.connect(anyString()).get()).thenReturn(mockDocument);
String result = webScraper.scrape("https://example.com");
assertEquals("No content found with the specified class.", result);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment