package parser; import com.google.common.base.Stopwatch; import com.google.common.base.Strings; import mysql.Mysql; import object.ResultDTO; import org.openqa.selenium.By; import org.openqa.selenium.StaleElementReferenceException; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeOptions; import org.openqa.selenium.remote.RemoteWebDriver; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; import java.net.MalformedURLException; import java.net.URL; import java.time.Duration; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; public class OddsPortal extends ParserBase implements ParserJoinedFunctions { private static final String DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV = "//div[contains(@class,'tabs')" + "]/div[not(@class)]/div[1]/div"; private static final String SOCCER_RESULTS_TABLE_NAME = "SoccerResults"; private static final String DATE_PATTERN = "yyyyMMdd"; final int sportsId = Mysql.getInstance().getSportId("soccer"); private LocalDateTime baseDate; private int sportId; private int countryId; private int leagueId; private int currentLeagueId; private int currentCountryId; private List resultsToInsert = new ArrayList<>(); private String currentSeason; public void getMatches(LocalDateTime date) { baseDate = date; final String dateFormatted = date.format(DateTimeFormatter.ofPattern(DATE_PATTERN)); System.out.println("Starting to get matches at date " + dateFormatted); Stopwatch stopwatch = Stopwatch.createStarted(); getMatchesByDateSelenium(dateFormatted); System.out.println("Done with date " + dateFormatted + " took " + stopwatch.elapsed(TimeUnit.SECONDS) + " " + "seconds"); stopwatch.stop(); } private int getCountryId(List links) { String country; country = links.get(1).getText().trim(); country = country.replace(" ", "-"); country = country.replace("\\.", ""); return Mysql.getInstance().getCountryId(country); } private String getLastSeason(int leagueId, int countryId) { String result = ""; if (!Strings.isNullOrEmpty(currentSeason) && currentLeagueId == leagueId && currentCountryId == countryId) { return currentSeason; } else { result = Mysql.getInstance().getLastSeason(leagueId, countryId); } if (Strings.isNullOrEmpty(result)) { result = String.valueOf(LocalDate.now().getYear()); } return result; } private int getLeagueId(List links) { String league; league = links.get(2).getText().trim(); league = league.replace(" ", "-"); league = league.replace("\\.", ""); return Mysql.getInstance().getLeagueId(sportId, countryId, league); } private void getMatchesByDateSelenium(String date) { final String soccerUrl = "https://oddsportal.com/matches/football/" + date; try { parseSoccerMatchesSelenium(soccerUrl); } catch (MalformedURLException e) { e.printStackTrace(); } } private void parseSoccerMatchesSelenium(final String soccerUrl) throws MalformedURLException { // ChromeDriver driver = getSeleniumDriver(); // DesiredCapabilities capabilities = new DesiredCapabilities(); // capabilities.setCapability("browserName", "Chrome"); // capabilities.setPlatform(Platform.LINUX); ChromeOptions options = new ChromeOptions(); // Fixing 255 Error crashes // options.addArguments("--no-sandbox"); options.addArguments("--disable-dev-shm-usage"); // Options to trick bot detection // Removing webdriver property options.addArguments("--disable-blink-features=AutomationControlled"); options.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation")); options.setExperimentalOption("useAutomationExtension", null); // Changing the user agent / browser fingerprint options.addArguments("window-size=1920,1080"); options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like " + "Gecko) " + "Chrome/74.0.3729.169 Safari/537.36"); // Other options.addArguments("disable-infobars"); System.out.println("Getting page"); RemoteWebDriver driver = new RemoteWebDriver(new URL("http://nordh.xyz:14444/wd/hub"), options); sportId = Mysql.getInstance().getSportId("soccer"); driver.get(soccerUrl); System.out.println("Waiting for elements"); WebDriverWait wait = getWaitDriver(driver, 90); wait.until(ExpectedConditions.numberOfElementsToBeMoreThan(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV), 1)); System.out.println("Switching odds"); if (!checkIfElementExists(driver, "//p[contains(@class, 'text-orange-main')]")) { WebElement oddsFormat = driver.findElement(By.xpath("//p[contains(@class, 'text-orange-main')]")); oddsFormat.click(); driver.findElement(By.xpath("//a[span[contains(text(),'Decimal Odds')]]")).click(); wait.withTimeout(Duration.ofSeconds(30)); } List divs = driver.findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV)); System.out.println("Start Working on matches"); boolean staleElementRetried = false; for (int i = 0; i < divs.size(); i++) { try { WebElement element = divs.get(i); scrollElementIntoView(driver, element); scrollElementIntoView(driver, element); ResultDTO result = new ResultDTO(SOCCER_RESULTS_TABLE_NAME); boolean somethingWrong = false; List subDivs = element.findElements(By.xpath("./div")); WebElement firstResultsDiv; if (subDivs.size() == 3) { WebElement competitionsDiv = subDivs.get(0); List links = competitionsDiv.findElements(By.xpath(".//a")); countryId = getCountryId(links); leagueId = getLeagueId(links); result.setLeagueId(leagueId); result.setCountryId(countryId); firstResultsDiv = subDivs.get(2); //.findElement(By.xpath("./a")); } else if (subDivs.size() == 1) { firstResultsDiv = subDivs.get(0); //.findElement(By.xpath("./a")); } else { System.out.println("No games found!"); continue; } // WebElement firstResultsDiv = element.findElement(By.xpath("./a")); setGameDate(result, firstResultsDiv); setTeamsInfo(result, firstResultsDiv); setScoring(result, firstResultsDiv); setOdds(result, firstResultsDiv); if (!somethingWrong) { final Mysql mysql = Mysql.getInstance(); result.setCountryId(countryId); result.setLeagueId(leagueId); result.setSeason(getLastSeason(leagueId, countryId)); resultsToInsert.add(result); if (resultsToInsert.size() > 100) { Logger.getGlobal().log(Level.INFO, "INSERTING 100 results"); mysql.addResults(resultsToInsert); resultsToInsert.clear(); } } divs = driver.findElements(By.xpath(DIV_CONTAINS_CLASS_TABS_DIV_NOT_CLASS_DIV_1_DIV)); } catch (StaleElementReferenceException e) { if (staleElementRetried) { System.out.println("Stale element have already been retried continueing"); staleElementRetried = false; i++; } System.out.println("Retrying stale element"); i--; staleElementRetried = true; } } String message = "INSERTING LAST " + resultsToInsert.size() + " results"; Logger.getGlobal().log(Level.INFO, message); Mysql.getInstance().addResults(resultsToInsert); resultsToInsert.clear(); driver.close(); driver.quit(); } private void setGameDate(ResultDTO result, WebElement firstResultsDiv) { final List time = new ArrayList<>(); String datePath = "./div/a/div/div//p"; if (checkIfElementExists(firstResultsDiv, datePath)) { String timeText = firstResultsDiv.findElement(By.xpath(datePath)).getText(); String[] timeSplit = timeText.split(":"); if (timeSplit.length == 2) { time.add(Integer.parseInt(timeSplit[0])); time.add(Integer.parseInt(timeSplit[1])); } else { time.add(0); time.add(0); } } else { time.add(0); time.add(0); } final LocalDateTime dt = baseDate.withHour(time.get(0)).withMinute(time.get(1)).withSecond(0).withNano(0); result.setGameDate(dt); } private void setOdds(ResultDTO result, WebElement firstResultsDiv) { List oddsDivs = firstResultsDiv.findElements(By.xpath("./div/div")); //List oddsDivs = firstResultsDiv.findElements(By.xpath("./parent::*/div")); try { result.setOdds1(Float.parseFloat(oddsDivs.get(0).getText())); result.setOddsX(Float.parseFloat(oddsDivs.get(1).getText())); result.setOdds2(Float.parseFloat(oddsDivs.get(2).getText())); } catch (NumberFormatException e) { // Skip this one } catch (IndexOutOfBoundsException e) { System.out.println("Index out of bounds for result: "); System.out.println(result); } } /** * Sets the scoring for the given result based on the provided WebElement. * * @param result the ResultDTO to set the scoring for * @param firstResultsDiv the WebElement containing the scoring information */ private void setScoring(ResultDTO result, WebElement firstResultsDiv) { // XPath to locate the scoring information within the firstResultsDiv String resultPath = "./div/a/div/div[2]/div/div/div/div/div"; // Check if the scoring element exists within the firstResultsDiv if (checkIfElementExists(firstResultsDiv, resultPath)) { // Find the scoring divs based on the resultPath List scoringDivs = firstResultsDiv.findElements(By.xpath(resultPath)); try { // Set the home and away scores from the scoringDivs result.setHomeScore(Integer.parseInt(scoringDivs.get(0).getText())); result.setAwayScore(Integer.parseInt(scoringDivs.get(1).getText())); } catch (NumberFormatException e) { // If the scoring information is not available, continue processing } } } private void setTeamsInfo(ResultDTO result, WebElement firstResultsDiv) { String homeTeamName = firstResultsDiv.findElement(By.xpath(".//a[1]/div/div[2]//p")).getText().trim(); String awayTeamName = firstResultsDiv.findElement(By.xpath(".//a[1]/div/div[2]/div/div/a[2]//p")).getText().trim(); result.setHomeTeam(homeTeamName); result.setAwayTeam(awayTeamName); result.setHomeTeamId(Mysql.getInstance().getOrInsertTeam(homeTeamName, countryId, leagueId, sportId)); result.setAwayTeamId(Mysql.getInstance().getOrInsertTeam(awayTeamName, countryId, leagueId, sportId)); } }