|
|
@@ -1,17 +1,9 @@
|
|
|
package parser;
|
|
|
|
|
|
-import java.awt.image.BufferedImage;
|
|
|
-import java.io.File;
|
|
|
-import java.io.IOException;
|
|
|
-import java.net.MalformedURLException;
|
|
|
-import java.net.URL;
|
|
|
-import java.time.Duration;
|
|
|
-import java.util.Arrays;
|
|
|
-import java.util.Collections;
|
|
|
-import java.util.List;
|
|
|
-
|
|
|
-import javax.imageio.ImageIO;
|
|
|
-
|
|
|
+import com.google.common.base.Strings;
|
|
|
+import database.Database;
|
|
|
+import io.github.bonigarcia.wdm.WebDriverManager;
|
|
|
+import obejcts.Recepie;
|
|
|
import org.imgscalr.Scalr;
|
|
|
import org.openqa.selenium.By;
|
|
|
import org.openqa.selenium.JavascriptExecutor;
|
|
|
@@ -21,14 +13,20 @@ import org.openqa.selenium.chrome.ChromeOptions;
|
|
|
import org.openqa.selenium.support.ui.ExpectedConditions;
|
|
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
|
|
|
|
|
-import com.google.common.base.Strings;
|
|
|
-
|
|
|
-import database.Database;
|
|
|
-import obejcts.Recepie;
|
|
|
+import javax.imageio.ImageIO;
|
|
|
+import java.awt.image.BufferedImage;
|
|
|
+import java.io.File;
|
|
|
+import java.io.IOException;
|
|
|
+import java.net.MalformedURLException;
|
|
|
+import java.net.URL;
|
|
|
+import java.time.Duration;
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.Collections;
|
|
|
+import java.util.List;
|
|
|
|
|
|
public class ParserBase implements Parser {
|
|
|
|
|
|
- List<String> measurements = Arrays.asList("msk", "tsk", "g", "kg", "ml", "dl", "l", "st", "krm", "förp", "kruka", "färsk",
|
|
|
+ List<String> measurements = Arrays.asList("msk", "tsk", "g", "kg", "ml", "dl", "l", "st", "krm", "f�rp", "kruka", "f�rsk",
|
|
|
"burk", "knippe", "kvist", "cm", "burkar", "cl", "port");
|
|
|
protected Database database = new Database();
|
|
|
protected ChromeDriver driver;
|
|
|
@@ -47,12 +45,12 @@ public class ParserBase implements Parser {
|
|
|
}
|
|
|
url += searchWords;
|
|
|
|
|
|
- driver = getSeleniumDriver();
|
|
|
+ // driver = getSeleniumDriver();
|
|
|
+ driver = getDriver();
|
|
|
wait = getWaitDriver(driver);
|
|
|
jsExecutor = getJsExecutor(driver);
|
|
|
|
|
|
driver.get(url);
|
|
|
- wait.until(ExpectedConditions.numberOfElementsToBeMoreThan(By.xpath(recepiesItemListXpath), 0));
|
|
|
|
|
|
Thread.sleep(500);
|
|
|
if (checkIfElementExists(driver, cookieConsentButtonXpath)) {
|
|
|
@@ -60,6 +58,8 @@ public class ParserBase implements Parser {
|
|
|
Thread.sleep(100);
|
|
|
}
|
|
|
|
|
|
+ wait.until(ExpectedConditions.numberOfElementsToBeMoreThan(By.xpath(recepiesItemListXpath), 0));
|
|
|
+
|
|
|
List<WebElement> recepies = driver.findElements(By.xpath(recepiesItemListXpath));
|
|
|
|
|
|
for (int i = 0; i < recepies.size(); i++) {
|
|
|
@@ -140,11 +140,11 @@ public class ParserBase implements Parser {
|
|
|
}
|
|
|
|
|
|
protected String getAmountFromText(String text) {
|
|
|
- text = text.replaceAll("[^0-9\\/\\.,-½]", "").trim();
|
|
|
+ text = text.replaceAll("[^0-9\\/\\.,-�]", "").trim();
|
|
|
if (text.endsWith(",")) {
|
|
|
text.substring(0, text.length() - 1);
|
|
|
}
|
|
|
- return text.replaceAll("[^0-9\\/\\.,-½]", "").trim();
|
|
|
+ return text.replaceAll("[^0-9\\/\\.,-�]", "").trim();
|
|
|
}
|
|
|
|
|
|
protected JavascriptExecutor getJsExecutor(ChromeDriver driver) {
|
|
|
@@ -163,6 +163,35 @@ public class ParserBase implements Parser {
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
+ public ChromeDriver getDriver() {
|
|
|
+ WebDriverManager.chromedriver().setup();
|
|
|
+ ChromeDriver driver = new ChromeDriver(getChromeOptions());
|
|
|
+ return driver;
|
|
|
+ }
|
|
|
+
|
|
|
+ private ChromeOptions getChromeOptions() {
|
|
|
+ ChromeOptions options = new ChromeOptions();
|
|
|
+ // Fixing 255 Error crashes
|
|
|
+ options.addArguments("--no-sandbox");
|
|
|
+ options.addArguments("--disable-dev-shm-usage");
|
|
|
+
|
|
|
+ // Options to trick bot detection
|
|
|
+ // Removing webdriver property
|
|
|
+ options.addArguments("--disable-blink-features=AutomationControlled");
|
|
|
+ options.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation"));
|
|
|
+ options.setExperimentalOption("useAutomationExtension", null);
|
|
|
+
|
|
|
+ // Changing the user agent / browser fingerprint
|
|
|
+ options.addArguments("window-size=1920,1080");
|
|
|
+ options.addArguments(
|
|
|
+ "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36");
|
|
|
+
|
|
|
+ // Other
|
|
|
+ options.addArguments("disable-infobars");
|
|
|
+
|
|
|
+ return options;
|
|
|
+ }
|
|
|
+
|
|
|
protected ChromeDriver getSeleniumDriver() {
|
|
|
ChromeOptions options = new ChromeOptions();
|
|
|
|