OddsPortal.java 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. package parser;
  2. import java.io.IOException;
  3. import java.sql.SQLException;
  4. import java.time.LocalDate;
  5. import java.time.LocalDateTime;
  6. import java.time.format.DateTimeFormatter;
  7. import java.util.List;
  8. import java.util.Locale;
  9. import java.util.logging.Level;
  10. import java.util.logging.Logger;
  11. import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
  12. import com.gargoylesoftware.htmlunit.WebClient;
  13. import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
  14. import com.gargoylesoftware.htmlunit.html.HtmlDivision;
  15. import com.gargoylesoftware.htmlunit.html.HtmlPage;
  16. import com.gargoylesoftware.htmlunit.html.HtmlSpan;
  17. import com.gargoylesoftware.htmlunit.html.HtmlTable;
  18. import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
  19. import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
  20. import com.google.common.base.Strings;
  21. import mysql.Mysql;
  22. public class OddsPortal implements ParserJoinedFunctions {
  23. private LocalDateTime baseDate;
  24. private int currentParsePage;
  25. private int sportId;
  26. private int countryId;
  27. private int leagueId;
  28. private LocalDateTime gameDate;
  29. // https://stackoverflow.com/questions/14439991/skip-particular-javascript-execution-in-html-unit Skip url
  30. public void getMatchesByDate(String date) {
  31. final String soccerUrl = "https://oddsportal.com/matches/soccer/" + date;
  32. // final String hockeyUrl = "https://oddsportal.com/matches/hockey/" + date;
  33. final WebClient webClient = new WebClient();
  34. webClient.getOptions().setUseInsecureSSL(true);
  35. webClient.getOptions().setCssEnabled(false);
  36. webClient.getOptions().setJavaScriptEnabled(true);
  37. webClient.getOptions().setThrowExceptionOnScriptError(false);
  38. Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
  39. webClient.waitForBackgroundJavaScript(3000);
  40. parseSoccerMatches(soccerUrl, webClient);
  41. webClient.close();
  42. }
  43. private void parseSoccerMatches(final String soccerUrl, final WebClient webClient) {
  44. try {
  45. System.out.println("Getting Webpage");
  46. final HtmlPage soccerMatches = webClient.getPage(soccerUrl);
  47. final HtmlTable matchesTable = soccerMatches.getFirstByXPath("//table[contains(@class, table-main)]");
  48. final List<HtmlTableRow> rows = matchesTable.getRows();
  49. String countryName = "";
  50. String leagueName = "";
  51. int i = 1;
  52. final int size = rows.size();
  53. for (final HtmlTableRow tr : rows) {
  54. System.out.println("Processing " + i++ + " of " + size);
  55. if (tr.getAttribute("class").equals("dark center")) {
  56. final List<HtmlAnchor> countryLeague = tr.getByXPath(".//a");
  57. countryName = countryLeague.get(0).getTextContent().toLowerCase().trim();
  58. leagueName = countryLeague.get(1).getTextContent().toLowerCase().trim();
  59. leagueName = leagueName.replaceAll(" ", "-");
  60. leagueName = leagueName.replaceAll("\\.", "");
  61. countryName = countryName.replaceAll(" ", "-");
  62. countryName = countryName.replaceAll("\\.", "");
  63. } else {
  64. final List<HtmlTableCell> cells = tr.getCells();
  65. final String[] time = cells.get(0).getTextContent().split(":");
  66. final String[] teams = cells.get(1).getTextContent().split(" - ");
  67. float odds1 = 0F;
  68. float oddsX = 0F;
  69. float odds2 = 0F;
  70. int homeScore = -1;
  71. int awayScore = -1;
  72. boolean overtime = false;
  73. boolean abandon = false;
  74. try {
  75. for (final HtmlTableCell tc : cells) {
  76. if (tc.getAttribute("class").contains("live-score")) {
  77. abandon = true;
  78. break;
  79. }
  80. //Score
  81. if (tc.getAttribute("class").contains("table-score")) {
  82. final String[] scoreValue = tc.getTextContent().split(":");
  83. homeScore = Integer.valueOf(scoreValue[0]);
  84. if (scoreValue[1].matches("\\D+")) {
  85. overtime = true;
  86. }
  87. awayScore = Integer.valueOf(scoreValue[1].replaceAll("\\D+", ""));
  88. }
  89. if (tc.getAttribute("class").contains("odds-nowrp")) {
  90. if (tc.getTextContent().matches("[+-][0-9][0-9][0-9]")) {
  91. if (odds1 == 0F) {
  92. odds1 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
  93. } else if (oddsX == 0F ) {
  94. oddsX = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
  95. } else if (odds2 == 0F ) {
  96. odds2 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
  97. }
  98. } else if (tc.getTextContent().matches("[0-9].[0-9]+")) {
  99. if (odds1 == 0F) {
  100. odds1 = Float.valueOf(tc.getTextContent());
  101. } else if (oddsX == 0F ) {
  102. oddsX = Float.valueOf(tc.getTextContent());
  103. } else if (odds2 == 0F ) {
  104. odds2 = Float.valueOf(tc.getTextContent());
  105. }
  106. }
  107. }
  108. }
  109. } catch (final NumberFormatException e) {
  110. System.out.println("Failed to get the match between " + teams[0].trim() + " and " + teams[1].trim() + " at " +
  111. baseDate.withHour(Integer.valueOf(time[0])).withMinute(Integer.valueOf(time[1])) +
  112. " odds1: " + odds1 + " oddsX: " + oddsX + " odds2: " + odds2 + " homeScore " + homeScore + " awayScore " + awayScore + " overtime: " + (overtime?"true":"false"));
  113. continue;
  114. }
  115. if (abandon) {
  116. continue;
  117. }
  118. final Mysql mysql = Mysql.getInstance();
  119. final int leagueId = mysql.addLeague(leagueName, countryName, "soccer");
  120. final int countryId = mysql.getCountryId(countryName);
  121. final int sportId = mysql.getSportId("soccer");
  122. String season = mysql.getLastParsedYear(leagueName, countryId);
  123. if (Strings.isNullOrEmpty(season)) {
  124. season = String.valueOf(LocalDateTime.now().getYear());
  125. }
  126. final LocalDateTime dt = baseDate.withHour(Integer.valueOf(time[0])).withMinute(Integer.valueOf(time[1])).withSecond(0).withNano(0);
  127. mysql.addResult("SoccerResults", dt, teams[0].trim(), teams[1].trim(), homeScore, awayScore, overtime, odds1, oddsX, odds2, countryId, season, leagueId, sportId);
  128. }
  129. }
  130. } catch (FailingHttpStatusCodeException | IOException e) {
  131. e.printStackTrace();
  132. } catch (final SQLException e) {
  133. e.printStackTrace();
  134. }
  135. }
  136. public void getHistoricMatches(String sport, String country, String league, String year) {
  137. final String url = "https://www.oddsportal.com/";
  138. final String resultsPage = "/results";
  139. final WebClient webClient = new WebClient();
  140. webClient.getOptions().setUseInsecureSSL(true);
  141. webClient.getOptions().setCssEnabled(false);
  142. webClient.getOptions().setJavaScriptEnabled(true);
  143. webClient.getOptions().setThrowExceptionOnScriptError(false);
  144. Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
  145. league = league.replaceAll(" ", "-");
  146. league = league.replaceAll("\\.", "");
  147. country = country.replaceAll(" ", "-");
  148. league = league.replaceAll("\\.", "");
  149. final Mysql mysql = Mysql.getInstance();
  150. currentParsePage = 1;
  151. final String urlYearPart;
  152. if (year.equals(String.valueOf(LocalDate.now().getYear()))) {
  153. urlYearPart = "";
  154. } else {
  155. urlYearPart = "-" + year;
  156. }
  157. try {
  158. sportId = mysql.getSportId(sport);
  159. countryId = mysql.getCountryId(country);
  160. leagueId = mysql.getLeagueId(sportId, countryId, league);
  161. String season = "";
  162. final HtmlPage leaguePage = webClient.getPage(url + "/" + sport + "/" + country + "/" + league + urlYearPart + resultsPage);
  163. final List<HtmlAnchor> yearFilter = leaguePage.getByXPath("//ul[contains(@class,'main-filter')]//a");
  164. for (final HtmlAnchor a : yearFilter) {
  165. System.out.println("Year filter: " + a.getHrefAttribute());
  166. final String active = ((HtmlSpan)a.getParentNode().getParentNode()).getAttribute("class");
  167. if (active.contains("active") && !active.contains("inactive")) {
  168. season = a.getTextContent();
  169. year = season.replace('/', '-');
  170. }
  171. }
  172. HtmlDivision tournamentTableDiv = leaguePage.getHtmlElementById("tournamentTable");
  173. HtmlTable tournamentTable = (HtmlTable) tournamentTableDiv.getFirstChild();
  174. gameDate = LocalDateTime.now();
  175. final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH);
  176. parseTournamentTable(sportId, countryId, leagueId, season, tournamentTable, gameDate, dateFormatter);
  177. final HtmlDivision paginationLinksDiv = (HtmlDivision) tournamentTableDiv.getLastChild();
  178. final List<HtmlAnchor> pagiantionLinks = paginationLinksDiv.getByXPath(".//a[contains(@href, 'page') and not(.//span[contains(@class, 'arrow')])]");
  179. for (final HtmlAnchor a : pagiantionLinks) {
  180. System.out.println("Continuing with Pagination: " + a.getHrefAttribute());
  181. // When done with start page click pagiantion
  182. final int parsePage = Integer.valueOf(a.getTextContent());
  183. if (parsePage > currentParsePage) {
  184. a.click();
  185. webClient.waitForBackgroundJavaScript(1000);
  186. tournamentTableDiv = leaguePage.getHtmlElementById("tournamentTable");
  187. tournamentTable = (HtmlTable) tournamentTableDiv.getFirstChild();
  188. parseTournamentTable(sportId, countryId, leagueId, season, tournamentTable, gameDate, dateFormatter);
  189. currentParsePage = parsePage;
  190. }
  191. // process new tournament table content
  192. }
  193. } catch (FailingHttpStatusCodeException | IOException e) {
  194. e.printStackTrace();
  195. } catch (final SQLException sqle) {
  196. sqle.printStackTrace();
  197. } catch (final ClassCastException cce) {
  198. System.out.println("No pagination table");
  199. // cce.printStackTrace();
  200. }
  201. finally {
  202. Mysql.getInstance().setParsingForLeague(leagueId, sportId, countryId, gameDate, currentParsePage, year);
  203. }
  204. webClient.close();
  205. System.out.println("DONE with " + country + " (" + countryId + ") league " + league + "(" + leagueId + ")");
  206. }
  207. private void parseTournamentTable(int sportId, int countryId, int leagueId, String season,
  208. HtmlTable tournamentTable, LocalDateTime gameDate, DateTimeFormatter dateFormatter) throws SQLException {
  209. for (final HtmlTableRow tr : tournamentTable.getRows()) {
  210. if (tr.getAttribute("class").contains("deactivate")) {
  211. String homeTeam;
  212. String awayTeam;
  213. int homeScore = -1;
  214. int awayScore = -1;
  215. float odds1 = 0f;
  216. float oddsX = 0f;
  217. float odds2 = 0f;
  218. boolean overtime = false;
  219. final HtmlTableCell timeCell = tr.getCell(0);
  220. final HtmlTableCell participantsCell = tr.getCell(1);
  221. // Game Time
  222. final String[] timeValue = timeCell.getTextContent().split(":");
  223. gameDate = gameDate.withHour(Integer.valueOf(timeValue[0]));
  224. gameDate = gameDate.withMinute(Integer.valueOf(timeValue[1]));
  225. // Teams
  226. final String[] participantsValue = participantsCell.getTextContent().split(" - ");
  227. homeTeam = participantsValue[0].trim();
  228. awayTeam = participantsValue[1].trim();
  229. final List<HtmlTableCell> cells = tr.getCells();
  230. for (final HtmlTableCell tc : cells) {
  231. //Score
  232. if (tc.getAttribute("class").contains("table-score")) {
  233. final String[] scoreValue = tc.getTextContent().split(":");
  234. if (scoreValue[0].matches("\\D+")) {
  235. continue;
  236. }
  237. homeScore = Integer.valueOf(scoreValue[0]);
  238. if (scoreValue[1].matches("\\D+")) {
  239. overtime = true;
  240. }
  241. awayScore = Integer.valueOf(scoreValue[1].replaceAll("\\D+", ""));
  242. }
  243. if (tc.getAttribute("class").contains("odds-nowrp")) {
  244. if (tc.getTextContent().matches("[+-][0-9][0-9][0-9]")) {
  245. if (odds1 == 0F) {
  246. odds1 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
  247. } else if (oddsX == 0F ) {
  248. oddsX = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
  249. } else if (odds2 == 0F ) {
  250. odds2 = convertAmericanOddsToDecimal(Integer.valueOf(tc.getTextContent()));
  251. }
  252. } else if (tc.getTextContent().matches("[0-9].[0-9]+")) {
  253. if (odds1 == 0F) {
  254. odds1 = Float.valueOf(tc.getTextContent());
  255. } else if (oddsX == 0F ) {
  256. oddsX = Float.valueOf(tc.getTextContent());
  257. } else if (odds2 == 0F ) {
  258. odds2 = Float.valueOf(tc.getTextContent());
  259. }
  260. }
  261. }
  262. }
  263. if (gameDate != null && homeTeam != null && awayTeam != null &&
  264. odds1 != 0 && oddsX != 0 && odds2 != 0 && !Strings.isNullOrEmpty(season)) { // All set.. update sql result table
  265. System.out.println("Adding game between " + homeTeam + " and " + awayTeam + " with score " + homeScore + "-" + awayScore);
  266. Mysql.getInstance().addResult("SoccerResults", gameDate, homeTeam, awayTeam, homeScore, awayScore, overtime, odds1, oddsX, odds2, countryId, season, leagueId, sportId);
  267. } else {
  268. System.out.println(String.format("Not adding, missing somethind.. gameDate: %s, homeTeam %s, awayTeam %s, odds1 %s, oddsX %s, odds2 %s, "
  269. + "season %s", gameDate, homeTeam, awayTeam, odds1, oddsX, odds2, season));
  270. }
  271. } else if (tr.getAttribute("class").contains("center nob-border")) { // Datum rader
  272. final List<HtmlSpan> dateSpan = tr.getByXPath(".//span[contains(@class, 'datet')]");
  273. final String dateString = dateSpan.get(0).getTextContent();
  274. if (dateString.toLowerCase().contains("yesterday")) {
  275. gameDate = LocalDateTime.now().minusDays(1);
  276. } else if (dateString.toLowerCase().contains("today")) {
  277. gameDate = LocalDateTime.now();
  278. } else {
  279. gameDate = LocalDate.parse(dateString, dateFormatter).atStartOfDay();
  280. }
  281. }
  282. }
  283. }
  284. }