OddsPortal.java 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. package parser;
  2. import java.net.MalformedURLException;
  3. import java.net.URL;
  4. import java.time.Duration;
  5. import java.time.LocalDate;
  6. import java.time.LocalDateTime;
  7. import java.time.format.DateTimeFormatter;
  8. import java.util.ArrayList;
  9. import java.util.Collections;
  10. import java.util.List;
  11. import java.util.concurrent.TimeUnit;
  12. import java.util.logging.Level;
  13. import java.util.logging.Logger;
  14. import org.openqa.selenium.By;
  15. import org.openqa.selenium.Platform;
  16. import org.openqa.selenium.StaleElementReferenceException;
  17. import org.openqa.selenium.WebElement;
  18. import org.openqa.selenium.chrome.ChromeDriver;
  19. import org.openqa.selenium.chrome.ChromeOptions;
  20. import org.openqa.selenium.remote.DesiredCapabilities;
  21. import org.openqa.selenium.remote.RemoteWebDriver;
  22. import org.openqa.selenium.support.ui.ExpectedConditions;
  23. import org.openqa.selenium.support.ui.WebDriverWait;
  24. import com.google.common.base.Stopwatch;
  25. import com.google.common.base.Strings;
  26. import mysql.Mysql;
  27. import object.ResultDTO;
  28. public class OddsPortal extends ParserBase implements ParserJoinedFunctions {
  29. private static final String DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV = "//div[contains(@class,'tabs')]/div[not(@class)]/div[1]/div";
  30. private static final String SOCCER_RESULTS_TABLE_NAME = "SoccerResults";
  31. private static final String DATE_PATTERN = "yyyyMMdd";
  32. private LocalDateTime baseDate;
  33. private int sportId;
  34. private int countryId;
  35. private int leagueId;
  36. final int sportsId = Mysql.getInstance().getSportId("soccer");
  37. private int currentLeagueId;
  38. private int currentCountryId;
  39. private List<ResultDTO> resultsToInsert = new ArrayList<>();
  40. private String currentSeason;
  41. public void getMatches(LocalDateTime date) {
  42. baseDate = date;
  43. final String dateFormatted = date.format(DateTimeFormatter.ofPattern(DATE_PATTERN));
  44. System.out.println("Starting to get matches at date " + dateFormatted);
  45. Stopwatch stopwatch = Stopwatch.createStarted();
  46. getMatchesByDateSelenium(dateFormatted);
  47. System.out.println(
  48. "Done with date " + dateFormatted + " took " + stopwatch.elapsed(TimeUnit.SECONDS) + " seconds");
  49. }
  50. private int getCountryId(List<WebElement> links) {
  51. String country;
  52. country = links.get(1).getText().trim();
  53. country = country.replace(" ", "-");
  54. country = country.replace("\\.", "");
  55. return Mysql.getInstance().getCountryId(country);
  56. }
  57. private String getLastSeason(int leagueId, int countryId) {
  58. String result = "";
  59. if (!Strings.isNullOrEmpty(currentSeason) && currentLeagueId == leagueId && currentCountryId == countryId) {
  60. return currentSeason;
  61. } else {
  62. result = Mysql.getInstance().getLastSeason(leagueId, countryId);
  63. }
  64. if (Strings.isNullOrEmpty(result)) {
  65. result = String.valueOf(LocalDate.now().getYear());
  66. }
  67. return result;
  68. }
  69. private int getLeagueId(List<WebElement> links) {
  70. String league;
  71. league = links.get(2).getText().trim();
  72. league = league.replace(" ", "-");
  73. league = league.replace("\\.", "");
  74. return Mysql.getInstance().getLeagueId(sportId, countryId, league);
  75. }
  76. private void getMatchesByDateSelenium(String date) {
  77. final String soccerUrl = "https://oddsportal.com/matches/football/" + date;
  78. try {
  79. parseSoccerMatchesSelenium(soccerUrl);
  80. } catch (MalformedURLException e) {
  81. // TODO Auto-generated catch block
  82. e.printStackTrace();
  83. }
  84. }
  85. private void parseSoccerMatchesSelenium(final String soccerUrl) throws MalformedURLException {
  86. // ChromeDriver driver = getSeleniumDriver();
  87. // DesiredCapabilities capabilities = new DesiredCapabilities();
  88. // capabilities.setCapability("browserName", "Chrome");
  89. // capabilities.setPlatform(Platform.LINUX);
  90. ChromeOptions options = new ChromeOptions();
  91. // Fixing 255 Error crashes
  92. options.addArguments("--no-sandbox");
  93. options.addArguments("--disable-dev-shm-usage");
  94. // Options to trick bot detection
  95. // Removing webdriver property
  96. options.addArguments("--disable-blink-features=AutomationControlled");
  97. options.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation"));
  98. options.setExperimentalOption("useAutomationExtension", null);
  99. // Changing the user agent / browser fingerprint
  100. options.addArguments("window-size=1920,1080");
  101. options.addArguments(
  102. "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36");
  103. // Other
  104. options.addArguments("disable-infobars");
  105. RemoteWebDriver driver = new RemoteWebDriver(new URL("http://nordh.xyz:4444/wd/hub"), options);
  106. sportId = Mysql.getInstance().getSportId("soccer");
  107. driver.get(soccerUrl);
  108. WebDriverWait wait = getWaitDriver(driver, 90);
  109. wait.until(ExpectedConditions
  110. .numberOfElementsToBeMoreThan(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV), 1));
  111. if (!checkIfElementExists(driver, "//p[text()='EU Odds']")) {
  112. WebElement oddsFormat = driver
  113. .findElement(By.xpath("//p[@class='self-center text-xs text-orange-main'][1]"));
  114. oddsFormat.click();
  115. driver.findElement(By.xpath("//a[text()='EU Odds']")).click();
  116. wait.withTimeout(Duration.ofSeconds(30));
  117. }
  118. List<WebElement> divs = driver
  119. .findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV));
  120. boolean staleElementRetried = false;
  121. for (int i = 0; i < divs.size(); i++) {
  122. try {
  123. WebElement element = divs.get(i);
  124. scrollElementIntoView(driver, element);
  125. ResultDTO result = new ResultDTO(SOCCER_RESULTS_TABLE_NAME);
  126. boolean somethingWrong = false;
  127. List<WebElement> subDivs = element.findElements(By.xpath("./div"));
  128. if (subDivs.size() == 3) {
  129. WebElement competitionsDiv = subDivs.get(0);
  130. List<WebElement> links = competitionsDiv.findElements(By.xpath(".//a"));
  131. countryId = getCountryId(links);
  132. leagueId = getLeagueId(links);
  133. result.setLeagueId(leagueId);
  134. result.setCountryId(countryId);
  135. WebElement firstResultsDiv = subDivs.get(2);
  136. setGameDate(result, firstResultsDiv);
  137. setTeamsInfo(result, firstResultsDiv);
  138. setScoring(result, firstResultsDiv);
  139. setOdds(result, firstResultsDiv);
  140. } else if (subDivs.size() == 1) {
  141. result.setLeagueId(leagueId);
  142. result.setCountryId(countryId);
  143. WebElement firstResultsDiv = subDivs.get(0);
  144. setGameDate(result, firstResultsDiv);
  145. setTeamsInfo(result, firstResultsDiv);
  146. setScoring(result, firstResultsDiv);
  147. setOdds(result, firstResultsDiv);
  148. } else {
  149. somethingWrong = true;
  150. String message = "Subdiv size = " + subDivs.size();
  151. Logger.getGlobal().log(Level.WARNING, message);
  152. }
  153. if (!somethingWrong) {
  154. final Mysql mysql = Mysql.getInstance();
  155. result.setCountryId(countryId);
  156. result.setLeagueId(leagueId);
  157. result.setSeason(getLastSeason(leagueId, countryId));
  158. resultsToInsert.add(result);
  159. if (resultsToInsert.size() > 100) {
  160. Logger.getGlobal().log(Level.INFO, "INSERTING 100 results");
  161. mysql.addResults(resultsToInsert);
  162. resultsToInsert.clear();
  163. }
  164. }
  165. divs = driver
  166. .findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV));
  167. } catch (StaleElementReferenceException e) {
  168. if (staleElementRetried) {
  169. System.out.println("Stale element have already been retried continueing");
  170. staleElementRetried = false;
  171. i++;
  172. }
  173. System.out.println("Retrying stale element");
  174. i--;
  175. staleElementRetried = true;
  176. }
  177. }
  178. String message = "INSERTING LAST " + resultsToInsert.size()
  179. + " results";
  180. Logger.getGlobal().log(Level.INFO, message);
  181. Mysql.getInstance().addResults(resultsToInsert);
  182. resultsToInsert.clear();
  183. driver.close();
  184. driver.quit();
  185. }
  186. private void setGameDate(ResultDTO result, WebElement firstResultsDiv) {
  187. final List<Integer> time = new ArrayList<>();
  188. // String datePath = "./div/a/div[1]/div/p";
  189. String datePath = "./div/div/div/div/div/p";
  190. if (checkIfElementExists(firstResultsDiv, datePath)) {
  191. String timeText = firstResultsDiv.findElement(By.xpath(datePath)).getText();
  192. String[] timeSplit = timeText.split(":");
  193. if (timeSplit.length == 2) {
  194. time.add(Integer.parseInt(timeSplit[0]));
  195. time.add(Integer.parseInt(timeSplit[1]));
  196. } else {
  197. time.add(0);
  198. time.add(0);
  199. }
  200. } else {
  201. time.add(0);
  202. time.add(0);
  203. }
  204. final LocalDateTime dt = baseDate.withHour(time.get(0))
  205. .withMinute(time.get(1)).withSecond(0)
  206. .withNano(0);
  207. result.setGameDate(dt);
  208. }
  209. private void setOdds(ResultDTO result, WebElement firstResultsDiv) {
  210. List<WebElement> oddsDivs = firstResultsDiv.findElements(By.xpath("./div/div"));
  211. try {
  212. result.setOdds1(Float.parseFloat(oddsDivs.get(1).getText()));
  213. result.setOddsX(Float.parseFloat(oddsDivs.get(2).getText()));
  214. result.setOdds2(Float.parseFloat(oddsDivs.get(3).getText()));
  215. } catch (NumberFormatException e) {
  216. // Skip this one
  217. } catch (IndexOutOfBoundsException e) {
  218. System.out.println(result);
  219. }
  220. }
  221. private void setScoring(ResultDTO result, WebElement firstResultsDiv) {
  222. String resultPath = "./div/div/div[2]/div/div/div/div/div";
  223. if (checkIfElementExists(firstResultsDiv, resultPath)) {
  224. List<WebElement> scoringDivs = firstResultsDiv.findElements(By.xpath(resultPath));
  225. try {
  226. result.setHomeScore(Integer.parseInt(scoringDivs.get(0).getText()));
  227. result.setAwayScore(Integer.parseInt(scoringDivs.get(1).getText()));
  228. } catch (NumberFormatException e) {
  229. // There is no scoring yet continue processing
  230. }
  231. }
  232. }
  233. private void setTeamsInfo(ResultDTO result, WebElement firstResultsDiv) {
  234. String homeTeamName = firstResultsDiv.findElement(By.xpath(".//a[1]/div")).getText().trim();
  235. String awayTeamName = firstResultsDiv.findElement(By.xpath(".//a[2]/div")).getText().trim();
  236. result.setHomeTeam(homeTeamName);
  237. result.setAwayTeam(awayTeamName);
  238. result.setHomeTeamId(
  239. Mysql.getInstance().getOrInsertTeam(homeTeamName, countryId, leagueId, sportId));
  240. result.setAwayTeamId(
  241. Mysql.getInstance().getOrInsertTeam(awayTeamName, countryId, leagueId, sportId));
  242. }
  243. }