Koket.java 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. package parser;
  2. import java.awt.image.BufferedImage;
  3. import java.io.File;
  4. import java.io.IOException;
  5. import java.net.MalformedURLException;
  6. import java.net.URL;
  7. import java.util.List;
  8. import java.util.regex.Matcher;
  9. import java.util.regex.Pattern;
  10. import javax.imageio.ImageIO;
  11. import org.imgscalr.Scalr;
  12. import org.openqa.selenium.By;
  13. import org.openqa.selenium.NoSuchElementException;
  14. import org.openqa.selenium.WebElement;
  15. import org.openqa.selenium.support.ui.ExpectedConditions;
  16. import obejcts.Ingredient;
  17. import obejcts.Recepie;
  18. public class Koket extends ParserBase {
  19. public static final String RECEPIE_TITLE_XPATH = ".//h2[contains(@class,'list_item_title')]";
  20. public static final String BASE_URL = "https://www.koket.se/";
  21. public static final String PATH_SEPARATOR = "%20";
  22. public static final String RECEPIELIST_ITEMS_XPATH = "//div[contains(@class,'list_item_wrapper')]";
  23. public static final String URL_SEARCH_PATTERN = "search?searchtext=";
  24. public static final String COOKIE_CONSENT_BUTTON_XPATH = "//button[@id='onetrust-accept-btn-handler']";
  25. public static final String IMAGE_XPATH = "//img[contains(@class,'image__')]";
  26. public static final String SHOW_MORE_XPATH = "//div[contains(@class,'show-more-button')]";
  27. public static boolean isFromHereRecepie(String url) {
  28. return url.contains(BASE_URL);
  29. }
  30. @Override protected String getAmountFromText(String text) {
  31. Pattern pattern = Pattern.compile("^[0-9, \\/-]*");
  32. Matcher matcher = pattern.matcher(text);
  33. if (matcher.find()) {
  34. return matcher.group();
  35. }
  36. return "";
  37. }
  38. @Override void handleRecepie(WebElement recepie, String recepieTitle, Recepie recepieFromDb) {
  39. if (checkIfElementExists(recepie, ".//div[contains(@class,'rating_wrapper')]")) {
  40. super.handleRecepie(recepie, recepieTitle, recepieFromDb);
  41. }
  42. }
  43. @Override protected void parseRecepie(WebElement recepie, String recepieTitle) {
  44. try {
  45. Thread.sleep(1000);
  46. recepie.click();
  47. Thread.sleep(200);
  48. wait.until(ExpectedConditions.numberOfElementsToBeMoreThan(By.xpath("//h1[contains(@class, 'recipe_title')]"), 0));
  49. String title = driver.findElement(By.xpath("//h1[contains(@class, 'recipe_title')]")).getText();
  50. String time = "";
  51. if (checkIfElementExists(driver, "//div[contains(@class, 'details_wrapper')]//p[2]//span")) {
  52. String timeText = driver.findElement(By.xpath("//div[contains(@class, 'details_wrapper')]//p[2]//span"))
  53. .getText();
  54. time = timeText.replaceAll("[^+0-9]", "");
  55. }
  56. Recepie newRecepie = new Recepie();
  57. newRecepie.setTime(time);
  58. newRecepie.setUrl(driver.getCurrentUrl());
  59. newRecepie.setName(recepieTitle);
  60. WebElement desc = driver.findElement(By.xpath("//div[contains(@class, 'description_description')]"));
  61. scrollElementIntoView(driver, desc);
  62. if (checkIfElementExists(desc, ".//p")) {
  63. newRecepie.setDescription(desc.findElement(By.xpath(".//p")).getText());
  64. }
  65. newRecepie.setId(database.insertRecepie(newRecepie));
  66. List<WebElement> ingredients = driver.findElements(By.xpath("//span[@class='ingredient']//span"));
  67. String measurement = "";
  68. String amount = "";
  69. for (WebElement ingredient : ingredients) {
  70. String name = fixName(ingredient.getText());
  71. measurement = getMeasurement(name);
  72. amount = getAmountFromText(name);
  73. name = name.replace(measurement, "");
  74. name = name.replace(amount, "");
  75. name = name.trim();
  76. database.addIngredientToRecepie(newRecepie.getId(), new Ingredient(fixName(name), amount, measurement));
  77. }
  78. List<WebElement> steps = driver
  79. .findElements(By.xpath("//ol[contains(@class, 'instruction_section_numberedList')]//span"));
  80. int stepCounter = 1;
  81. for (WebElement step : steps) {
  82. database.addRecepieStep(newRecepie.getId(), stepCounter++, step.getText());
  83. }
  84. saveImage(newRecepie);
  85. driver.navigate().back();
  86. } catch (InterruptedException e) {
  87. e.printStackTrace();
  88. }
  89. }
  90. void saveImage(Recepie newRecepie) {
  91. try {
  92. WebElement imageElement = driver.findElement(By.xpath(IMAGE_XPATH));
  93. String imgSrc = imageElement.getAttribute("currentSrc");
  94. if (imgSrc.contains(".webp")) {
  95. imgSrc = imgSrc.replace(".webp", "");
  96. }
  97. URL imageUrl = new URL(imgSrc);
  98. BufferedImage savedImage = ImageIO.read(imageUrl);
  99. BufferedImage resizedImage = Scalr.resize(savedImage, 300);
  100. File file = new File("recepie-image.png");
  101. ImageIO.write(resizedImage, "png", file);
  102. database.saveRecepieImage(newRecepie.getId(), file);
  103. } catch (MalformedURLException e) {
  104. e.printStackTrace();
  105. } catch (IOException e) {
  106. e.printStackTrace();
  107. } catch (NoSuchElementException ne) {
  108. File file = new File("empty.png");
  109. try {
  110. file.createNewFile();
  111. } catch (IOException e) {
  112. e.printStackTrace();
  113. }
  114. database.saveRecepieImage(newRecepie.getId(), file);
  115. }
  116. }
  117. }