|
|
@@ -0,0 +1,382 @@
|
|
|
+package parser;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
+import java.sql.SQLException;
|
|
|
+import java.time.LocalDate;
|
|
|
+import java.time.LocalDateTime;
|
|
|
+import java.time.format.DateTimeFormatter;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Locale;
|
|
|
+import java.util.logging.Level;
|
|
|
+import java.util.logging.Logger;
|
|
|
+
|
|
|
+import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
|
|
|
+import com.gargoylesoftware.htmlunit.WebClient;
|
|
|
+import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
|
|
|
+import com.gargoylesoftware.htmlunit.html.HtmlDivision;
|
|
|
+import com.gargoylesoftware.htmlunit.html.HtmlPage;
|
|
|
+import com.gargoylesoftware.htmlunit.html.HtmlSpan;
|
|
|
+import com.gargoylesoftware.htmlunit.html.HtmlTable;
|
|
|
+import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
|
|
|
+import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
|
|
|
+import com.google.common.base.Strings;
|
|
|
+
|
|
|
+import mysql.Mysql;
|
|
|
+
|
|
|
+public class OddsPortal implements ParserJoinedFunctions {
|
|
|
+
|
|
|
+ private LocalDateTime baseDate;
|
|
|
+ private int currentParsePage;
|
|
|
+ private int sportId;
|
|
|
+ private int countryId;
|
|
|
+ private int leagueId;
|
|
|
+ private LocalDateTime gameDate;
|
|
|
+
|
|
|
+ public void getYesterdaysMatches() {
|
|
|
+ baseDate = LocalDateTime.now().plusDays(-1);
|
|
|
+ final String date = LocalDate.now().plusDays(-1).format(DateTimeFormatter.ofPattern("yyyyMMdd"));
|
|
|
+ getMatchesByDate(date);
|
|
|
+ }
|
|
|
+
|
|
|
+ public void getTodaysMatches() {
|
|
|
+ baseDate = LocalDateTime.now();
|
|
|
+ final String date = LocalDate.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"));
|
|
|
+ getMatchesByDate(date);
|
|
|
+ }
|
|
|
+
|
|
|
+ public void getTomorrowsMatches() {
|
|
|
+ baseDate = LocalDateTime.now().plusDays(1);
|
|
|
+ final String dateTomorrow = LocalDate.now().plusDays(1).format(DateTimeFormatter.ofPattern("yyyyMMdd"));
|
|
|
+ getMatchesByDate(dateTomorrow);
|
|
|
+ }
|
|
|
+
|
|
|
+ public void getNextDaysMatches() {
|
|
|
+ baseDate = LocalDateTime.now().plusDays(2);
|
|
|
+ final String dateTomorrow = LocalDate.now().plusDays(2).format(DateTimeFormatter.ofPattern("yyyyMMdd"));
|
|
|
+ getMatchesByDate(dateTomorrow);
|
|
|
+ }
|
|
|
+
|
|
|
+ // https://stackoverflow.com/questions/14439991/skip-particular-javascript-execution-in-html-unit
|
|
|
+ // Skip url
|
|
|
+ private void getMatchesByDate(String date) {
|
|
|
+ final String soccerUrl = "https://oddsportal.com/matches/soccer/" + date;
|
|
|
+ // final String hockeyUrl = "https://oddsportal.com/matches/hockey/" + date;
|
|
|
+
|
|
|
+ final WebClient webClient = new WebClient();
|
|
|
+ webClient.getOptions().setUseInsecureSSL(true);
|
|
|
+ webClient.getOptions().setCssEnabled(false);
|
|
|
+ webClient.getOptions().setJavaScriptEnabled(true);
|
|
|
+ webClient.getOptions().setThrowExceptionOnScriptError(false);
|
|
|
+ Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
|
|
|
+
|
|
|
+ webClient.waitForBackgroundJavaScript(3000);
|
|
|
+ parseSoccerMatches(soccerUrl, webClient, date);
|
|
|
+
|
|
|
+ webClient.close();
|
|
|
+ }
|
|
|
+
|
|
|
+ private void parseSoccerMatches(final String soccerUrl, final WebClient webClient, String date) {
|
|
|
+ try {
|
|
|
+ System.out.println("Getting Webpage");
|
|
|
+ final HtmlPage soccerMatches = webClient.getPage(soccerUrl);
|
|
|
+ final HtmlTable matchesTable = soccerMatches.getFirstByXPath("//table[contains(@class, table-main)]");
|
|
|
+ final List<HtmlTableRow> rows = matchesTable.getRows();
|
|
|
+ String countryName = "";
|
|
|
+ String leagueName = "";
|
|
|
+ int i = 1;
|
|
|
+ final int size = rows.size();
|
|
|
+ for (final HtmlTableRow tr : rows) {
|
|
|
+ System.out.println("Processing " + i++ + " of " + size);
|
|
|
+ if (tr.getAttribute("class").equals("dark center")) {
|
|
|
+ final List<HtmlAnchor> countryLeague = tr.getByXPath(".//a");
|
|
|
+ countryName = countryLeague.get(0).asNormalizedText().toLowerCase().trim();
|
|
|
+ leagueName = countryLeague.get(1).asNormalizedText().toLowerCase().trim();
|
|
|
+ leagueName = leagueName.replaceAll(" ", "-");
|
|
|
+ leagueName = leagueName.replaceAll("\\.", "");
|
|
|
+ countryName = countryName.replaceAll(" ", "-");
|
|
|
+ countryName = countryName.replaceAll("\\.", "");
|
|
|
+ } else {
|
|
|
+ final List<HtmlTableCell> cells = tr.getCells();
|
|
|
+ final String[] time = cells.get(0).asNormalizedText().split(":");
|
|
|
+ final String[] teams = cells.get(1).asNormalizedText().split(" - ");
|
|
|
+ float odds1 = 0F;
|
|
|
+ float oddsX = 0F;
|
|
|
+ float odds2 = 0F;
|
|
|
+ int homeScore = -1;
|
|
|
+ int awayScore = -1;
|
|
|
+ boolean overtime = false;
|
|
|
+
|
|
|
+ boolean abandon = false;
|
|
|
+
|
|
|
+ try {
|
|
|
+ for (final HtmlTableCell tc : cells) {
|
|
|
+ if (tc.getAttribute("class").contains("live-score")) {
|
|
|
+ abandon = true;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ // Score
|
|
|
+ if (tc.getAttribute("class").contains("table-score")) {
|
|
|
+ final String[] scoreValue = tc.asNormalizedText().split(":");
|
|
|
+ homeScore = Integer.valueOf(scoreValue[0]);
|
|
|
+ if (scoreValue[1].matches("\\D+")) {
|
|
|
+ overtime = true;
|
|
|
+ }
|
|
|
+ awayScore = Integer.valueOf(scoreValue[1].replaceAll("\\D+", ""));
|
|
|
+ }
|
|
|
+ if (tc.getAttribute("class").contains("odds-nowrp")) {
|
|
|
+ if (tc.asNormalizedText().matches("[+-][0-9][0-9][0-9]")) {
|
|
|
+ if (odds1 == 0F) {
|
|
|
+ odds1 = convertAmericanOddsToDecimal(Integer.valueOf(tc.asNormalizedText()));
|
|
|
+ } else if (oddsX == 0F) {
|
|
|
+ oddsX = convertAmericanOddsToDecimal(Integer.valueOf(tc.asNormalizedText()));
|
|
|
+ } else if (odds2 == 0F) {
|
|
|
+ odds2 = convertAmericanOddsToDecimal(Integer.valueOf(tc.asNormalizedText()));
|
|
|
+ }
|
|
|
+ } else if (tc.asNormalizedText().matches("[0-9].[0-9]+")) {
|
|
|
+ if (odds1 == 0F) {
|
|
|
+ odds1 = Float.valueOf(tc.asNormalizedText());
|
|
|
+ } else if (oddsX == 0F) {
|
|
|
+ oddsX = Float.valueOf(tc.asNormalizedText());
|
|
|
+ } else if (odds2 == 0F) {
|
|
|
+ odds2 = Float.valueOf(tc.asNormalizedText());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ } catch (final NumberFormatException e) {
|
|
|
+ System.out.println("Failed to get the match between " + teams[0].trim() + " and " + teams[1].trim()
|
|
|
+ + " at " + baseDate.withHour(Integer.valueOf(time[0])).withMinute(Integer.valueOf(time[1]))
|
|
|
+ + " odds1: " + odds1 + " oddsX: " + oddsX + " odds2: " + odds2 + " homeScore " + homeScore
|
|
|
+ + " awayScore " + awayScore + " overtime: " + (overtime ? "true" : "false"));
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (abandon) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ final Mysql mysql = Mysql.getInstance();
|
|
|
+ final int leagueId = mysql.addLeague(leagueName, countryName, "soccer");
|
|
|
+ final int countryId = mysql.getCountryId(countryName);
|
|
|
+ final int sportId = mysql.getSportId("soccer");
|
|
|
+
|
|
|
+ // String season = mysql.getLastParsedYear(leagueName, countryId); // TODO This
|
|
|
+ // don't work
|
|
|
+ String season = String.valueOf(LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyyMMdd")).getYear());
|
|
|
+ if (Strings.isNullOrEmpty(season)) {
|
|
|
+ season = String.valueOf(LocalDateTime.now().getYear());
|
|
|
+ }
|
|
|
+
|
|
|
+ final LocalDateTime dt = baseDate.withHour(Integer.valueOf(time[0])).withMinute(Integer.valueOf(time[1]))
|
|
|
+ .withSecond(0).withNano(0);
|
|
|
+ mysql.addResult("SoccerResults",
|
|
|
+ dt,
|
|
|
+ teams[0].trim(),
|
|
|
+ teams[1].trim(),
|
|
|
+ homeScore,
|
|
|
+ awayScore,
|
|
|
+ overtime,
|
|
|
+ odds1,
|
|
|
+ oddsX,
|
|
|
+ odds2,
|
|
|
+ countryId,
|
|
|
+ season,
|
|
|
+ leagueId,
|
|
|
+ sportId);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (FailingHttpStatusCodeException | IOException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ } catch (final SQLException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void getHistoricMatches(String sport, String country, String league, String year) {
|
|
|
+ final String url = "https://www.oddsportal.com/";
|
|
|
+ final String resultsPage = "/results";
|
|
|
+ final WebClient webClient = new WebClient();
|
|
|
+ webClient.getOptions().setUseInsecureSSL(true);
|
|
|
+ webClient.getOptions().setCssEnabled(false);
|
|
|
+ webClient.getOptions().setJavaScriptEnabled(true);
|
|
|
+ webClient.getOptions().setThrowExceptionOnScriptError(false);
|
|
|
+ Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
|
|
|
+
|
|
|
+ league = league.replaceAll(" ", "-");
|
|
|
+ league = league.replaceAll("\\.", "");
|
|
|
+ country = country.replaceAll(" ", "-");
|
|
|
+ league = league.replaceAll("\\.", "");
|
|
|
+ final Mysql mysql = Mysql.getInstance();
|
|
|
+
|
|
|
+ currentParsePage = 1;
|
|
|
+
|
|
|
+ final String urlYearPart;
|
|
|
+ if (year.equals(String.valueOf(LocalDate.now().getYear()))) {
|
|
|
+ urlYearPart = "";
|
|
|
+ } else {
|
|
|
+ urlYearPart = "-" + year;
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ sportId = mysql.getSportId(sport);
|
|
|
+ countryId = mysql.getCountryId(country);
|
|
|
+ leagueId = mysql.getLeagueId(sportId, countryId, league);
|
|
|
+ String season = "";
|
|
|
+
|
|
|
+ final HtmlPage leaguePage
|
|
|
+ = webClient.getPage(url + "/" + sport + "/" + country + "/" + league + urlYearPart + resultsPage);
|
|
|
+ final List<HtmlAnchor> yearFilter = leaguePage.getByXPath("//ul[contains(@class,'main-filter')]//a");
|
|
|
+ for (final HtmlAnchor a : yearFilter) {
|
|
|
+ System.out.println("Year filter: " + a.getHrefAttribute());
|
|
|
+ final String active = ((HtmlSpan) a.getParentNode().getParentNode()).getAttribute("class");
|
|
|
+ if (active.contains("active") && !active.contains("inactive")) {
|
|
|
+ season = a.asNormalizedText();
|
|
|
+ year = season.replace('/', '-');
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ HtmlDivision tournamentTableDiv = leaguePage.getHtmlElementById("tournamentTable");
|
|
|
+ HtmlTable tournamentTable = (HtmlTable) tournamentTableDiv.getFirstChild();
|
|
|
+
|
|
|
+ gameDate = LocalDateTime.now();
|
|
|
+ final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH);
|
|
|
+ parseTournamentTable(sportId, countryId, leagueId, season, tournamentTable, gameDate, dateFormatter);
|
|
|
+ final HtmlDivision paginationLinksDiv = (HtmlDivision) tournamentTableDiv.getLastChild();
|
|
|
+ final List<HtmlAnchor> pagiantionLinks
|
|
|
+ = paginationLinksDiv.getByXPath(".//a[contains(@href, 'page') and not(.//span[contains(@class, 'arrow')])]");
|
|
|
+ for (final HtmlAnchor a : pagiantionLinks) {
|
|
|
+ System.out.println("Continuing with Pagination: " + a.getHrefAttribute());
|
|
|
+ // When done with start page click pagiantion
|
|
|
+ final int parsePage = Integer.valueOf(a.getTextContent());
|
|
|
+ if (parsePage > currentParsePage) {
|
|
|
+ a.click();
|
|
|
+ webClient.waitForBackgroundJavaScript(1000);
|
|
|
+
|
|
|
+ tournamentTableDiv = leaguePage.getHtmlElementById("tournamentTable");
|
|
|
+ tournamentTable = (HtmlTable) tournamentTableDiv.getFirstChild();
|
|
|
+ parseTournamentTable(sportId, countryId, leagueId, season, tournamentTable, gameDate, dateFormatter);
|
|
|
+ currentParsePage = parsePage;
|
|
|
+ }
|
|
|
+ // process new tournament table content
|
|
|
+ }
|
|
|
+ } catch (FailingHttpStatusCodeException | IOException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ } catch (final SQLException sqle) {
|
|
|
+ sqle.printStackTrace();
|
|
|
+ } catch (final ClassCastException cce) {
|
|
|
+ System.out.println("No pagination table");
|
|
|
+ // cce.printStackTrace();
|
|
|
+ } finally {
|
|
|
+ Mysql.getInstance().setParsingForLeague(leagueId, sportId, countryId, gameDate, currentParsePage, year);
|
|
|
+ }
|
|
|
+ webClient.close();
|
|
|
+ System.out.println("DONE with " + country + " (" + countryId + ") league " + league + "(" + leagueId + ")");
|
|
|
+ }
|
|
|
+
|
|
|
+ private void parseTournamentTable(int sportId, int countryId, int leagueId, String season, HtmlTable tournamentTable,
|
|
|
+ LocalDateTime gameDate, DateTimeFormatter dateFormatter) throws SQLException {
|
|
|
+ for (final HtmlTableRow tr : tournamentTable.getRows()) {
|
|
|
+ if (tr.getAttribute("class").contains("deactivate")) {
|
|
|
+ String homeTeam;
|
|
|
+ String awayTeam;
|
|
|
+ int homeScore = -1;
|
|
|
+ int awayScore = -1;
|
|
|
+ float odds1 = 0f;
|
|
|
+ float oddsX = 0f;
|
|
|
+ float odds2 = 0f;
|
|
|
+ boolean overtime = false;
|
|
|
+
|
|
|
+ final HtmlTableCell timeCell = tr.getCell(0);
|
|
|
+ final HtmlTableCell participantsCell = tr.getCell(1);
|
|
|
+
|
|
|
+ // Game Time
|
|
|
+ final String[] timeValue = timeCell.asNormalizedText().split(":");
|
|
|
+ gameDate = gameDate.withHour(Integer.valueOf(timeValue[0]));
|
|
|
+ gameDate = gameDate.withMinute(Integer.valueOf(timeValue[1]));
|
|
|
+
|
|
|
+ // Teams
|
|
|
+ final String[] participantsValue = participantsCell.asNormalizedText().split(" - ");
|
|
|
+ homeTeam = participantsValue[0].trim();
|
|
|
+ awayTeam = participantsValue[1].trim();
|
|
|
+
|
|
|
+ final List<HtmlTableCell> cells = tr.getCells();
|
|
|
+ for (final HtmlTableCell tc : cells) {
|
|
|
+ // Score
|
|
|
+ if (tc.getAttribute("class").contains("table-score")) {
|
|
|
+ final String[] scoreValue = tc.asNormalizedText().split(":");
|
|
|
+ if (scoreValue[0].matches("\\D+")) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ homeScore = Integer.valueOf(scoreValue[0]);
|
|
|
+ if (scoreValue[1].matches("\\D+")) {
|
|
|
+ overtime = true;
|
|
|
+ }
|
|
|
+ awayScore = Integer.valueOf(scoreValue[1].replaceAll("\\D+", ""));
|
|
|
+ }
|
|
|
+
|
|
|
+ if (tc.getAttribute("class").contains("odds-nowrp")) {
|
|
|
+ if (tc.asNormalizedText().matches("[+-][0-9][0-9][0-9]")) {
|
|
|
+ if (odds1 == 0F) {
|
|
|
+ odds1 = convertAmericanOddsToDecimal(Integer.valueOf(tc.asNormalizedText()));
|
|
|
+ } else if (oddsX == 0F) {
|
|
|
+ oddsX = convertAmericanOddsToDecimal(Integer.valueOf(tc.asNormalizedText()));
|
|
|
+ } else if (odds2 == 0F) {
|
|
|
+ odds2 = convertAmericanOddsToDecimal(Integer.valueOf(tc.asNormalizedText()));
|
|
|
+ }
|
|
|
+ } else if (tc.asNormalizedText().matches("[0-9].[0-9]+")) {
|
|
|
+ if (odds1 == 0F) {
|
|
|
+ odds1 = Float.valueOf(tc.asNormalizedText());
|
|
|
+ } else if (oddsX == 0F) {
|
|
|
+ oddsX = Float.valueOf(tc.asNormalizedText());
|
|
|
+ } else if (odds2 == 0F) {
|
|
|
+ odds2 = Float.valueOf(tc.asNormalizedText());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (gameDate != null && homeTeam != null && awayTeam != null && odds1 != 0 && oddsX != 0 && odds2 != 0
|
|
|
+ && !Strings.isNullOrEmpty(season)) { // All set.. update sql result table
|
|
|
+ System.out.println("Adding game between " + homeTeam + " and " + awayTeam + " with score " + homeScore + "-"
|
|
|
+ + awayScore);
|
|
|
+ Mysql.getInstance().addResult("SoccerResults",
|
|
|
+ gameDate,
|
|
|
+ homeTeam,
|
|
|
+ awayTeam,
|
|
|
+ homeScore,
|
|
|
+ awayScore,
|
|
|
+ overtime,
|
|
|
+ odds1,
|
|
|
+ oddsX,
|
|
|
+ odds2,
|
|
|
+ countryId,
|
|
|
+ season,
|
|
|
+ leagueId,
|
|
|
+ sportId);
|
|
|
+ } else {
|
|
|
+ System.out.println(String.format(
|
|
|
+ "Not adding, missing somethind.. gameDate: %s, homeTeam %s, awayTeam %s, odds1 %s, oddsX %s, odds2 %s, "
|
|
|
+ + "season %s",
|
|
|
+ gameDate,
|
|
|
+ homeTeam,
|
|
|
+ awayTeam,
|
|
|
+ odds1,
|
|
|
+ oddsX,
|
|
|
+ odds2,
|
|
|
+ season));
|
|
|
+ }
|
|
|
+
|
|
|
+ } else if (tr.getAttribute("class").contains("center nob-border")) { // Datum rader
|
|
|
+ final List<HtmlSpan> dateSpan = tr.getByXPath(".//span[contains(@class, 'datet')]");
|
|
|
+ final String dateString = dateSpan.get(0).asNormalizedText();
|
|
|
+ if (dateString.toLowerCase().contains("yesterday")) {
|
|
|
+ gameDate = LocalDateTime.now().minusDays(1);
|
|
|
+ } else if (dateString.toLowerCase().contains("today")) {
|
|
|
+ gameDate = LocalDateTime.now();
|
|
|
+ } else {
|
|
|
+ gameDate = LocalDate.parse(dateString, dateFormatter).atStartOfDay();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|