OddsPortal.java 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. package parser;
  2. import java.net.MalformedURLException;
  3. import java.net.URL;
  4. import java.time.Duration;
  5. import java.time.LocalDate;
  6. import java.time.LocalDateTime;
  7. import java.time.format.DateTimeFormatter;
  8. import java.util.ArrayList;
  9. import java.util.Collections;
  10. import java.util.List;
  11. import java.util.concurrent.TimeUnit;
  12. import java.util.logging.Level;
  13. import java.util.logging.Logger;
  14. import org.openqa.selenium.By;
  15. import org.openqa.selenium.StaleElementReferenceException;
  16. import org.openqa.selenium.WebElement;
  17. import org.openqa.selenium.chrome.ChromeOptions;
  18. import org.openqa.selenium.remote.RemoteWebDriver;
  19. import org.openqa.selenium.support.ui.ExpectedConditions;
  20. import org.openqa.selenium.support.ui.WebDriverWait;
  21. import com.google.common.base.Stopwatch;
  22. import com.google.common.base.Strings;
  23. import mysql.Mysql;
  24. import object.ResultDTO;
  25. public class OddsPortal extends ParserBase implements ParserJoinedFunctions {
  26. private static final String DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV = "//div[contains(@class,'tabs')]/div[not(@class)]/div[1]/div";
  27. private static final String SOCCER_RESULTS_TABLE_NAME = "SoccerResults";
  28. private static final String DATE_PATTERN = "yyyyMMdd";
  29. private LocalDateTime baseDate;
  30. private int sportId;
  31. private int countryId;
  32. private int leagueId;
  33. final int sportsId = Mysql.getInstance().getSportId("soccer");
  34. private int currentLeagueId;
  35. private int currentCountryId;
  36. private List<ResultDTO> resultsToInsert = new ArrayList<>();
  37. private String currentSeason;
  38. public void getMatches(LocalDateTime date) {
  39. baseDate = date;
  40. final String dateFormatted = date.format(DateTimeFormatter.ofPattern(DATE_PATTERN));
  41. System.out.println("Starting to get matches at date " + dateFormatted);
  42. Stopwatch stopwatch = Stopwatch.createStarted();
  43. getMatchesByDateSelenium(dateFormatted);
  44. System.out.println(
  45. "Done with date " + dateFormatted + " took " + stopwatch.elapsed(TimeUnit.SECONDS) + " seconds");
  46. }
  47. private int getCountryId(List<WebElement> links) {
  48. String country;
  49. country = links.get(1).getText().trim();
  50. country = country.replace(" ", "-");
  51. country = country.replace("\\.", "");
  52. return Mysql.getInstance().getCountryId(country);
  53. }
  54. private String getLastSeason(int leagueId, int countryId) {
  55. String result = "";
  56. if (!Strings.isNullOrEmpty(currentSeason) && currentLeagueId == leagueId && currentCountryId == countryId) {
  57. return currentSeason;
  58. } else {
  59. result = Mysql.getInstance().getLastSeason(leagueId, countryId);
  60. }
  61. if (Strings.isNullOrEmpty(result)) {
  62. result = String.valueOf(LocalDate.now().getYear());
  63. }
  64. return result;
  65. }
  66. private int getLeagueId(List<WebElement> links) {
  67. String league;
  68. league = links.get(2).getText().trim();
  69. league = league.replace(" ", "-");
  70. league = league.replace("\\.", "");
  71. return Mysql.getInstance().getLeagueId(sportId, countryId, league);
  72. }
  73. private void getMatchesByDateSelenium(String date) {
  74. final String soccerUrl = "https://oddsportal.com/matches/football/" + date;
  75. try {
  76. parseSoccerMatchesSelenium(soccerUrl);
  77. } catch (MalformedURLException e) {
  78. // TODO Auto-generated catch block
  79. e.printStackTrace();
  80. }
  81. }
  82. private void parseSoccerMatchesSelenium(final String soccerUrl) throws MalformedURLException {
  83. // ChromeDriver driver = getSeleniumDriver();
  84. // DesiredCapabilities capabilities = new DesiredCapabilities();
  85. // capabilities.setCapability("browserName", "Chrome");
  86. // capabilities.setPlatform(Platform.LINUX);
  87. ChromeOptions options = new ChromeOptions();
  88. // Fixing 255 Error crashes
  89. options.addArguments("--no-sandbox");
  90. options.addArguments("--disable-dev-shm-usage");
  91. // Options to trick bot detection
  92. // Removing webdriver property
  93. options.addArguments("--disable-blink-features=AutomationControlled");
  94. options.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation"));
  95. options.setExperimentalOption("useAutomationExtension", null);
  96. // Changing the user agent / browser fingerprint
  97. options.addArguments("window-size=1920,1080");
  98. options.addArguments(
  99. "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36");
  100. // Other
  101. options.addArguments("disable-infobars");
  102. RemoteWebDriver driver = new RemoteWebDriver(new URL("http://nordh.xyz:4444/wd/hub"), options);
  103. sportId = Mysql.getInstance().getSportId("soccer");
  104. driver.get(soccerUrl);
  105. WebDriverWait wait = getWaitDriver(driver, 90);
  106. wait.until(ExpectedConditions
  107. .numberOfElementsToBeMoreThan(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV), 1));
  108. if (!checkIfElementExists(driver, "//p[text()='EU Odds']")) {
  109. WebElement oddsFormat = driver
  110. .findElement(By.xpath("//p[@class='self-center text-xs text-orange-main'][1]"));
  111. oddsFormat.click();
  112. driver.findElement(By.xpath("//a[text()='EU Odds']")).click();
  113. wait.withTimeout(Duration.ofSeconds(30));
  114. }
  115. List<WebElement> divs = driver
  116. .findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV));
  117. boolean staleElementRetried = false;
  118. for (int i = 0; i < divs.size(); i++) {
  119. try {
  120. WebElement element = divs.get(i);
  121. scrollElementIntoView(driver, element);
  122. ResultDTO result = new ResultDTO(SOCCER_RESULTS_TABLE_NAME);
  123. boolean somethingWrong = false;
  124. List<WebElement> subDivs = element.findElements(By.xpath("./div"));
  125. if (subDivs.size() == 3) {
  126. WebElement competitionsDiv = subDivs.get(0);
  127. List<WebElement> links = competitionsDiv.findElements(By.xpath(".//a"));
  128. countryId = getCountryId(links);
  129. leagueId = getLeagueId(links);
  130. result.setLeagueId(leagueId);
  131. result.setCountryId(countryId);
  132. WebElement firstResultsDiv = subDivs.get(2);
  133. setGameDate(result, firstResultsDiv);
  134. setTeamsInfo(result, firstResultsDiv);
  135. setScoring(result, firstResultsDiv);
  136. setOdds(result, firstResultsDiv);
  137. } else if (subDivs.size() == 1) {
  138. result.setLeagueId(leagueId);
  139. result.setCountryId(countryId);
  140. WebElement firstResultsDiv = subDivs.get(0);
  141. setGameDate(result, firstResultsDiv);
  142. setTeamsInfo(result, firstResultsDiv);
  143. setScoring(result, firstResultsDiv);
  144. setOdds(result, firstResultsDiv);
  145. } else {
  146. somethingWrong = true;
  147. String message = "Subdiv size = " + subDivs.size();
  148. Logger.getGlobal().log(Level.WARNING, message);
  149. }
  150. if (!somethingWrong) {
  151. final Mysql mysql = Mysql.getInstance();
  152. result.setCountryId(countryId);
  153. result.setLeagueId(leagueId);
  154. result.setSeason(getLastSeason(leagueId, countryId));
  155. resultsToInsert.add(result);
  156. // System.out.println("INSERTING RESULT: " + result);
  157. //
  158. if (resultsToInsert.size() > 100) {
  159. Logger.getGlobal().log(Level.INFO, "INSERTING 100 results");
  160. // System.out.println("INSERTING RESULT " + result.getHomeTeam() + "-" + result.getAwayTeam()
  161. // + " league " + result.getLeagueId() + " score " + result.getHomeScore() + "-"
  162. // + result.getAwayScore());
  163. mysql.addResults(resultsToInsert);
  164. resultsToInsert.clear();
  165. }
  166. }
  167. divs = driver
  168. .findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV));
  169. } catch (StaleElementReferenceException e) {
  170. if (staleElementRetried) {
  171. System.out.println("Stale element have already been retried continueing");
  172. staleElementRetried = false;
  173. i++;
  174. }
  175. System.out.println("Retrying stale element");
  176. i--;
  177. staleElementRetried = true;
  178. }
  179. }
  180. String message = "INSERTING LAST " + resultsToInsert.size()
  181. + " results";
  182. Logger.getGlobal().log(Level.INFO, message);
  183. Mysql.getInstance().addResults(resultsToInsert);
  184. resultsToInsert.clear();
  185. driver.close();
  186. driver.quit();
  187. }
  188. private void setGameDate(ResultDTO result, WebElement firstResultsDiv) {
  189. final List<Integer> time = new ArrayList<>();
  190. if (checkIfElementExists(firstResultsDiv, "./div/a/div[1]/div/p")) {
  191. String timeText = firstResultsDiv.findElement(By.xpath("./div/a/div[1]/div/p")).getText();
  192. String[] timeSplit = timeText.split(":");
  193. if (timeSplit.length == 2) {
  194. time.add(Integer.parseInt(timeSplit[0]));
  195. time.add(Integer.parseInt(timeSplit[1]));
  196. } else {
  197. time.add(0);
  198. time.add(0);
  199. }
  200. } else {
  201. time.add(0);
  202. time.add(0);
  203. }
  204. final LocalDateTime dt = baseDate.withHour(time.get(0))
  205. .withMinute(time.get(1)).withSecond(0)
  206. .withNano(0);
  207. result.setGameDate(dt);
  208. }
  209. private void setOdds(ResultDTO result, WebElement firstResultsDiv) {
  210. List<WebElement> oddsDivs = firstResultsDiv.findElements(By.xpath("./div/div"));
  211. try {
  212. result.setOdds1(Float.parseFloat(oddsDivs.get(0).getText()));
  213. result.setOddsX(Float.parseFloat(oddsDivs.get(1).getText()));
  214. result.setOdds2(Float.parseFloat(oddsDivs.get(2).getText()));
  215. } catch (NumberFormatException e) {
  216. // Skip this one
  217. } catch (IndexOutOfBoundsException e) {
  218. System.out.println(result);
  219. }
  220. }
  221. private void setScoring(ResultDTO result, WebElement firstResultsDiv) {
  222. if (checkIfElementExists(firstResultsDiv, "./div/a/div[2]/div/div/div/div/div")) {
  223. List<WebElement> scoringDivs = firstResultsDiv.findElements(By.xpath("./div/a/div[2]/div/div/div/div/div"));
  224. try {
  225. result.setHomeScore(Integer.parseInt(scoringDivs.get(0).getText()));
  226. result.setAwayScore(Integer.parseInt(scoringDivs.get(2).getText()));
  227. } catch (NumberFormatException e) {
  228. // There is no scoring yet continue processing
  229. }
  230. }
  231. }
  232. private void setTeamsInfo(ResultDTO result, WebElement firstResultsDiv) {
  233. List<WebElement> teams = firstResultsDiv.findElements(By.xpath(".//div//a//div[2]//a"));
  234. String homeTeamName = teams.get(0).getText().trim();
  235. String awayTeamName = teams.get(1).getText().trim();
  236. result.setHomeTeam(homeTeamName);
  237. result.setAwayTeam(awayTeamName);
  238. result.setHomeTeamId(
  239. Mysql.getInstance().getOrInsertTeam(homeTeamName, countryId, leagueId, sportId));
  240. result.setAwayTeamId(
  241. Mysql.getInstance().getOrInsertTeam(awayTeamName, countryId, leagueId, sportId));
  242. }
  243. }