OddsPortal.java 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. package parser;
  2. import com.google.common.base.Stopwatch;
  3. import com.google.common.base.Strings;
  4. import mysql.Mysql;
  5. import object.ResultDTO;
  6. import org.openqa.selenium.By;
  7. import org.openqa.selenium.StaleElementReferenceException;
  8. import org.openqa.selenium.WebElement;
  9. import org.openqa.selenium.chrome.ChromeOptions;
  10. import org.openqa.selenium.remote.RemoteWebDriver;
  11. import org.openqa.selenium.support.ui.ExpectedConditions;
  12. import org.openqa.selenium.support.ui.WebDriverWait;
  13. import java.net.MalformedURLException;
  14. import java.net.URL;
  15. import java.time.Duration;
  16. import java.time.LocalDate;
  17. import java.time.LocalDateTime;
  18. import java.time.format.DateTimeFormatter;
  19. import java.util.ArrayList;
  20. import java.util.Collections;
  21. import java.util.List;
  22. import java.util.concurrent.TimeUnit;
  23. import java.util.logging.Level;
  24. import java.util.logging.Logger;
  25. public class OddsPortal extends ParserBase implements ParserJoinedFunctions {
  26. private static final String DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV = "//div[contains(@class,'tabs')" +
  27. "]/div[not(@class)]/div[1]/div";
  28. private static final String SOCCER_RESULTS_TABLE_NAME = "SoccerResults";
  29. private static final String DATE_PATTERN = "yyyyMMdd";
  30. final int sportsId = Mysql.getInstance().getSportId("soccer");
  31. private LocalDateTime baseDate;
  32. private int sportId;
  33. private int countryId;
  34. private int leagueId;
  35. private int currentLeagueId;
  36. private int currentCountryId;
  37. private List<ResultDTO> resultsToInsert = new ArrayList<>();
  38. private String currentSeason;
  39. public void getMatches(LocalDateTime date) {
  40. baseDate = date;
  41. final String dateFormatted = date.format(DateTimeFormatter.ofPattern(DATE_PATTERN));
  42. System.out.println("Starting to get matches at date " + dateFormatted);
  43. Stopwatch stopwatch = Stopwatch.createStarted();
  44. getMatchesByDateSelenium(dateFormatted);
  45. System.out.println("Done with date " + dateFormatted + " took " + stopwatch.elapsed(TimeUnit.SECONDS) + " " +
  46. "seconds");
  47. stopwatch.stop();
  48. }
  49. private int getCountryId(List<WebElement> links) {
  50. String country;
  51. country = links.get(1).getText().trim();
  52. country = country.replace(" ", "-");
  53. country = country.replace("\\.", "");
  54. return Mysql.getInstance().getCountryId(country);
  55. }
  56. private String getLastSeason(int leagueId, int countryId) {
  57. String result = "";
  58. if (!Strings.isNullOrEmpty(currentSeason) && currentLeagueId == leagueId && currentCountryId == countryId) {
  59. return currentSeason;
  60. } else {
  61. result = Mysql.getInstance().getLastSeason(leagueId, countryId);
  62. }
  63. if (Strings.isNullOrEmpty(result)) {
  64. result = String.valueOf(LocalDate.now().getYear());
  65. }
  66. return result;
  67. }
  68. private int getLeagueId(List<WebElement> links) {
  69. String league;
  70. league = links.get(2).getText().trim();
  71. league = league.replace(" ", "-");
  72. league = league.replace("\\.", "");
  73. return Mysql.getInstance().getLeagueId(sportId, countryId, league);
  74. }
  75. private void getMatchesByDateSelenium(String date) {
  76. final String soccerUrl = "https://oddsportal.com/matches/football/" + date;
  77. try {
  78. parseSoccerMatchesSelenium(soccerUrl);
  79. } catch (MalformedURLException e) {
  80. e.printStackTrace();
  81. }
  82. }
  83. private void parseSoccerMatchesSelenium(final String soccerUrl) throws MalformedURLException {
  84. // ChromeDriver driver = getSeleniumDriver();
  85. // DesiredCapabilities capabilities = new DesiredCapabilities();
  86. // capabilities.setCapability("browserName", "Chrome");
  87. // capabilities.setPlatform(Platform.LINUX);
  88. ChromeOptions options = new ChromeOptions();
  89. // Fixing 255 Error crashes
  90. // options.addArguments("--no-sandbox");
  91. options.addArguments("--disable-dev-shm-usage");
  92. // Options to trick bot detection
  93. // Removing webdriver property
  94. options.addArguments("--disable-blink-features=AutomationControlled");
  95. options.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation"));
  96. options.setExperimentalOption("useAutomationExtension", null);
  97. // Changing the user agent / browser fingerprint
  98. options.addArguments("window-size=1920,1080");
  99. options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like " +
  100. "Gecko) " + "Chrome/74.0.3729.169 Safari/537.36");
  101. // Other
  102. options.addArguments("disable-infobars");
  103. System.out.println("Getting page");
  104. RemoteWebDriver driver = new RemoteWebDriver(new URL("http://nordh.xyz:14444/wd/hub"), options);
  105. sportId = Mysql.getInstance().getSportId("soccer");
  106. driver.get(soccerUrl);
  107. System.out.println("Waiting for elements");
  108. WebDriverWait wait = getWaitDriver(driver, 90);
  109. wait.until(ExpectedConditions.numberOfElementsToBeMoreThan(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV), 1));
  110. System.out.println("Switching odds");
  111. if (!checkIfElementExists(driver, "//p[contains(@class, 'text-orange-main')]")) {
  112. WebElement oddsFormat = driver.findElement(By.xpath("//p[contains(@class, 'text-orange-main')]"));
  113. oddsFormat.click();
  114. driver.findElement(By.xpath("//a[span[contains(text(),'Decimal Odds')]]")).click();
  115. wait.withTimeout(Duration.ofSeconds(30));
  116. }
  117. List<WebElement> divs = driver.findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV));
  118. System.out.println("Start Working on matches");
  119. boolean staleElementRetried = false;
  120. for (int i = 0; i < divs.size(); i++) {
  121. try {
  122. WebElement element = divs.get(i);
  123. scrollElementIntoView(driver, element);
  124. scrollElementIntoView(driver, element);
  125. ResultDTO result = new ResultDTO(SOCCER_RESULTS_TABLE_NAME);
  126. boolean somethingWrong = false;
  127. List<WebElement> subDivs = element.findElements(By.xpath("./div"));
  128. WebElement firstResultsDiv;
  129. if (subDivs.size() == 3) {
  130. WebElement competitionsDiv = subDivs.get(0);
  131. List<WebElement> links = competitionsDiv.findElements(By.xpath(".//a"));
  132. countryId = getCountryId(links);
  133. leagueId = getLeagueId(links);
  134. result.setLeagueId(leagueId);
  135. result.setCountryId(countryId);
  136. firstResultsDiv = subDivs.get(2); //.findElement(By.xpath("./a"));
  137. } else if (subDivs.size() == 1) {
  138. firstResultsDiv = subDivs.get(0); //.findElement(By.xpath("./a"));
  139. } else {
  140. System.out.println("No games found!");
  141. continue;
  142. }
  143. // WebElement firstResultsDiv = element.findElement(By.xpath("./a"));
  144. setGameDate(result, firstResultsDiv);
  145. setTeamsInfo(result, firstResultsDiv);
  146. setScoring(result, firstResultsDiv);
  147. setOdds(result, firstResultsDiv);
  148. if (!somethingWrong) {
  149. final Mysql mysql = Mysql.getInstance();
  150. result.setCountryId(countryId);
  151. result.setLeagueId(leagueId);
  152. result.setSeason(getLastSeason(leagueId, countryId));
  153. resultsToInsert.add(result);
  154. if (resultsToInsert.size() > 100) {
  155. Logger.getGlobal().log(Level.INFO, "INSERTING 100 results");
  156. mysql.addResults(resultsToInsert);
  157. resultsToInsert.clear();
  158. }
  159. }
  160. divs = driver.findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV));
  161. } catch (StaleElementReferenceException e) {
  162. if (staleElementRetried) {
  163. System.out.println("Stale element have already been retried continueing");
  164. staleElementRetried = false;
  165. i++;
  166. }
  167. System.out.println("Retrying stale element");
  168. i--;
  169. staleElementRetried = true;
  170. }
  171. }
  172. String message = "INSERTING LAST " + resultsToInsert.size() + " results";
  173. Logger.getGlobal().log(Level.INFO, message);
  174. Mysql.getInstance().addResults(resultsToInsert);
  175. resultsToInsert.clear();
  176. driver.close();
  177. driver.quit();
  178. }
  179. private void setGameDate(ResultDTO result, WebElement firstResultsDiv) {
  180. final List<Integer> time = new ArrayList<>();
  181. String datePath = "./div/a/div/div//p";
  182. if (checkIfElementExists(firstResultsDiv, datePath)) {
  183. String timeText = firstResultsDiv.findElement(By.xpath(datePath)).getText();
  184. String[] timeSplit = timeText.split(":");
  185. if (timeSplit.length == 2) {
  186. time.add(Integer.parseInt(timeSplit[0]));
  187. time.add(Integer.parseInt(timeSplit[1]));
  188. } else {
  189. time.add(0);
  190. time.add(0);
  191. }
  192. } else {
  193. time.add(0);
  194. time.add(0);
  195. }
  196. final LocalDateTime dt = baseDate.withHour(time.get(0)).withMinute(time.get(1)).withSecond(0).withNano(0);
  197. result.setGameDate(dt);
  198. }
  199. private void setOdds(ResultDTO result, WebElement firstResultsDiv) {
  200. List<WebElement> oddsDivs = firstResultsDiv.findElements(By.xpath("./div/div"));
  201. //List<WebElement> oddsDivs = firstResultsDiv.findElements(By.xpath("./parent::*/div"));
  202. try {
  203. result.setOdds1(Float.parseFloat(oddsDivs.get(0).getText()));
  204. result.setOddsX(Float.parseFloat(oddsDivs.get(1).getText()));
  205. result.setOdds2(Float.parseFloat(oddsDivs.get(2).getText()));
  206. } catch (NumberFormatException e) {
  207. // Skip this one
  208. } catch (IndexOutOfBoundsException e) {
  209. System.out.println("Index out of bounds for result: ");
  210. System.out.println(result);
  211. }
  212. }
  213. /**
  214. * Sets the scoring for the given result based on the provided WebElement.
  215. *
  216. * @param result the ResultDTO to set the scoring for
  217. * @param firstResultsDiv the WebElement containing the scoring information
  218. */
  219. private void setScoring(ResultDTO result, WebElement firstResultsDiv) {
  220. // XPath to locate the scoring information within the firstResultsDiv
  221. String resultPath = "./div/a/div/div[2]/div/div/div/div/div";
  222. // Check if the scoring element exists within the firstResultsDiv
  223. if (checkIfElementExists(firstResultsDiv, resultPath)) {
  224. // Find the scoring divs based on the resultPath
  225. List<WebElement> scoringDivs = firstResultsDiv.findElements(By.xpath(resultPath));
  226. try {
  227. // Set the home and away scores from the scoringDivs
  228. result.setHomeScore(Integer.parseInt(scoringDivs.get(0).getText()));
  229. result.setAwayScore(Integer.parseInt(scoringDivs.get(1).getText()));
  230. } catch (NumberFormatException e) {
  231. // If the scoring information is not available, continue processing
  232. }
  233. }
  234. }
  235. private void setTeamsInfo(ResultDTO result, WebElement firstResultsDiv) {
  236. String homeTeamName = firstResultsDiv.findElement(By.xpath(".//a[1]/div/div[2]//p")).getText().trim();
  237. String awayTeamName = firstResultsDiv.findElement(By.xpath(".//a[1]/div/div[2]/div/div/a[2]//p")).getText().trim();
  238. result.setHomeTeam(homeTeamName);
  239. result.setAwayTeam(awayTeamName);
  240. result.setHomeTeamId(Mysql.getInstance().getOrInsertTeam(homeTeamName, countryId, leagueId, sportId));
  241. result.setAwayTeamId(Mysql.getInstance().getOrInsertTeam(awayTeamName, countryId, leagueId, sportId));
  242. }
  243. }