| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- package parser;
- import java.awt.image.BufferedImage;
- import java.io.File;
- import java.io.IOException;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import javax.imageio.ImageIO;
- import org.imgscalr.Scalr;
- import org.openqa.selenium.By;
- import org.openqa.selenium.NoSuchElementException;
- import org.openqa.selenium.WebElement;
- import org.openqa.selenium.support.ui.ExpectedConditions;
- import obejcts.Ingredient;
- import obejcts.Recepie;
- public class Koket extends ParserBase {
- public static final String RECEPIE_TITLE_XPATH = ".//h2[contains(@class,'list_item_title')]";
- public static final String BASE_URL = "https://www.koket.se/";
- public static final String PATH_SEPARATOR = "%20";
- public static final String RECEPIELIST_ITEMS_XPATH = "//div[contains(@class,'list_item_wrapper')]";
- public static final String URL_SEARCH_PATTERN = "search?searchtext=";
- public static final String COOKIE_CONSENT_BUTTON_XPATH = "//button[@id='onetrust-accept-btn-handler']";
- public static final String IMAGE_XPATH = "//img[contains(@class,'image__')]";
- public static final String SHOW_MORE_XPATH = "//div[contains(@class,'show-more-button')]";
- public static boolean isFromHereRecepie(String url) {
- return url.contains(BASE_URL);
- }
- @Override protected String getAmountFromText(String text) {
- Pattern pattern = Pattern.compile("^[0-9, \\/-]*");
- Matcher matcher = pattern.matcher(text);
- if (matcher.find()) {
- return matcher.group();
- }
- return "";
- }
- @Override void handleRecepie(WebElement recepie, String recepieTitle, Recepie recepieFromDb) {
- if (checkIfElementExists(recepie, ".//div[contains(@class,'rating_wrapper')]")) {
- super.handleRecepie(recepie, recepieTitle, recepieFromDb);
- }
- }
- @Override protected void parseRecepie(WebElement recepie, String recepieTitle) {
- try {
- Thread.sleep(1000);
- recepie.click();
- Thread.sleep(200);
- wait.until(ExpectedConditions.numberOfElementsToBeMoreThan(By.xpath("//h1[contains(@class, 'recipe_title')]"), 0));
- String title = driver.findElement(By.xpath("//h1[contains(@class, 'recipe_title')]")).getText();
- String time = "";
- if (checkIfElementExists(driver, "//div[contains(@class, 'details_wrapper')]//p[2]//span")) {
- String timeText = driver.findElement(By.xpath("//div[contains(@class, 'details_wrapper')]//p[2]//span"))
- .getText();
- time = timeText.replaceAll("[^+0-9]", "");
- }
- Recepie newRecepie = new Recepie();
- newRecepie.setTime(time);
- newRecepie.setUrl(driver.getCurrentUrl());
- newRecepie.setName(recepieTitle);
- WebElement desc = driver.findElement(By.xpath("//div[contains(@class, 'description_description')]"));
- scrollElementIntoView(driver, desc);
- if (checkIfElementExists(desc, ".//p")) {
- newRecepie.setDescription(desc.findElement(By.xpath(".//p")).getText());
- }
- newRecepie.setId(database.insertRecepie(newRecepie));
- List<WebElement> ingredients = driver.findElements(By.xpath("//span[@class='ingredient']//span"));
- String measurement = "";
- String amount = "";
- for (WebElement ingredient : ingredients) {
- String name = fixName(ingredient.getText());
- measurement = getMeasurement(name);
- amount = getAmountFromText(name);
- name = name.replace(measurement, "");
- name = name.replace(amount, "");
- name = name.trim();
- database.addIngredientToRecepie(newRecepie.getId(), new Ingredient(fixName(name), amount, measurement));
- }
- List<WebElement> steps = driver
- .findElements(By.xpath("//ol[contains(@class, 'instruction_section_numberedList')]//span"));
- int stepCounter = 1;
- for (WebElement step : steps) {
- database.addRecepieStep(newRecepie.getId(), stepCounter++, step.getText());
- }
- saveImage(newRecepie);
- driver.navigate().back();
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- void saveImage(Recepie newRecepie) {
- try {
- WebElement imageElement = driver.findElement(By.xpath(IMAGE_XPATH));
- String imgSrc = imageElement.getAttribute("currentSrc");
- if (imgSrc.contains(".webp")) {
- imgSrc = imgSrc.replace(".webp", "");
- }
- URL imageUrl = new URL(imgSrc);
- BufferedImage savedImage = ImageIO.read(imageUrl);
- BufferedImage resizedImage = Scalr.resize(savedImage, 300);
- File file = new File("recepie-image.png");
- ImageIO.write(resizedImage, "png", file);
- database.saveRecepieImage(newRecepie.getId(), file);
- } catch (MalformedURLException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } catch (NoSuchElementException ne) {
- File file = new File("empty.png");
- try {
- file.createNewFile();
- } catch (IOException e) {
- e.printStackTrace();
- }
- database.saveRecepieImage(newRecepie.getId(), file);
- }
- }
- }
|