<?xml version="1.0" encoding="utf-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">INFORMATICA</journal-id>
<journal-title-group><journal-title>Informatica</journal-title></journal-title-group>
<issn pub-type="epub">1822-8844</issn><issn pub-type="ppub">0868-4952</issn><issn-l>0868-4952</issn-l>
<publisher>
<publisher-name>Vilnius University</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">INFO1198</article-id>
<article-id pub-id-type="doi">10.15388/Informatica.2018.187</article-id>
<article-categories><subj-group subj-group-type="heading">
<subject>Research Article</subject></subj-group></article-categories>
<title-group>
<article-title>A Comparison of Decision Tree Induction with Binary Logistic Regression for the Prediction of the Risk of Cardiovascular Diseases in Adult Men</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Grabauskytė</surname><given-names>Ingrida</given-names></name><email xlink:href="ingrida.grabauskyte@lsmuni.lt">ingrida.grabauskyte@lsmuni.lt</email><xref ref-type="aff" rid="j_info1198_aff_001">1</xref><xref ref-type="corresp" rid="cor1">∗</xref><bio>
<p><bold>I. Grabauskytė</bold> is a PhD student at the Department of Population Studies, Institute of Cardiology, Lithuanian University of Health Sciences. She is a lecturer of biostatistics at the university. Her current research focus is on statistics and medical data analysis.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Tamošiūnas</surname><given-names>Abdonas</given-names></name><email xlink:href="abdonas.tamosiunas@lsmuni.lt">abdonas.tamosiunas@lsmuni.lt</email><xref ref-type="aff" rid="j_info1198_aff_001">1</xref><bio>
<p><bold>A. Tamošiūnas</bold>, Prof. Dr. Habil., head of laboratory, head researcher in the Department of Population Studies, Institute of Cardiology, Lithuanian University of Health Sciences. The field of research – epidemiology and primary prevention of cardiovascular disease.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Kavaliauskas</surname><given-names>Mindaugas</given-names></name><email xlink:href="m.kavaliauskas@ktu.lt">m.kavaliauskas@ktu.lt</email><xref ref-type="aff" rid="j_info1198_aff_002">2</xref><bio>
<p><bold>M. Kavaliauskas</bold>, Dr. is a lecturer at Kaunas University of Technology. He is giving lectures on mathematical statistics, time series analysis and data mining. His field of scientific research is methods of multivariate data analysis.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Radišauskas</surname><given-names>Ričardas</given-names></name><email xlink:href="ricardas.radisauskas@lsmuni.lt">ricardas.radisauskas@lsmuni.lt</email><xref ref-type="aff" rid="j_info1198_aff_001">1</xref><bio>
<p><bold>R. Radišauskas</bold>, Prof. Dr., senior researcher in the Department of Population Studies, Institute of Cardiology, Lithuanian University of Health Sciences. The field of research – epidemiology and primary prevention of cardiovascular disease.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Bernotienė</surname><given-names>Gailutė</given-names></name><email xlink:href="gailute.bernotiene@lsmuni.lt">gailute.bernotiene@lsmuni.lt</email><xref ref-type="aff" rid="j_info1198_aff_001">1</xref><bio>
<p><bold>G. Bernotienė</bold>, Assoc. Prof. Dr., senior researcher in the Department of Population Studies, Institute of Cardiology, Lithuanian University of Health Sciences. The field of research – epidemiology and primary prevention of cardiovascular disease.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Janilionis</surname><given-names>Vytautas</given-names></name><email xlink:href="vytautas.janilionis@ktu.lt">vytautas.janilionis@ktu.lt</email><xref ref-type="aff" rid="j_info1198_aff_002">2</xref><bio>
<p><bold>V. Janilionis</bold> is an associate professor at the Department of Applied Mathematics, Kaunas University of Technology. He received a PhD degree (Technical cybernetics and information theory) in 1989 from the Kaunas Polytechnic Institute, Lithuania. His major research interests include statistical data analysis, system modelling, identification and control, data mining methods and applications.</p></bio>
</contrib>
<aff id="j_info1198_aff_001"><label>1</label>Institute of Cardiology, Medical Academy, <institution>Lithuanian University of Health Sciences</institution>, Sukilėliu̧ pr. 15, LT-50162 Kaunas, <country>Lithuania</country></aff>
<aff id="j_info1198_aff_002"><label>2</label>Faculty of Mathematics and Natural Sciences, <institution>Kaunas University of Technology</institution>, Studentu̧ g. 50, LT-51368 Kaunas, <country>Lithuania</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>∗</label>Corresponding author.</corresp>
</author-notes>
<pub-date pub-type="ppub"><year>2018</year></pub-date><pub-date pub-type="epub"><day>1</day><month>1</month><year>2018</year></pub-date><volume>29</volume><issue>4</issue><fpage>675</fpage><lpage>692</lpage><history><date date-type="received"><month>6</month><year>2017</year></date><date date-type="accepted"><month>7</month><year>2018</year></date></history>
<permissions><copyright-statement>© 2018 Vilnius University</copyright-statement><copyright-year>2018</copyright-year>
<license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>Open access article under the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">CC BY</ext-link> license.</license-p></license></permissions>
<abstract>
<p>The main purpose of this article was to compare traditional binary logistic regression analysis with decision tree analysis for the evaluation of the risk of cardiovascular diseases in adult men living in the city. Patients and methods. In our study, we used data from the Multifactorial Ischemic Heart Disease Prevention Study (MIHDPS). In the MIHDPS study, a random sample of male inhabitants of Kaunas city (Lithuania) aged 40–59 years was examined between 1977 and 1980. We analysed a sample of 5626 men. Taking blood pressure lowering medicine, disability, intermittent claudication, regular smoking, a higher value of the body mass index, systolic blood pressure, age, total serum cholesterol, and walking in winter were associated with a higher probability of ischemic heart disease or cardiovascular diseases. Having more siblings and drinking alcohol were associated with a lower probability of these diseases. The binary logistic regression method showed a very slightly lower level of errors than the decision tree did (the difference between the two methods was 2.04% for ischemic heart disease (IHD) and 2.86% for cardiovascular disease (CVD), but for consumers, the decision tree is easier to understand and interpret the results. Both of these methods are appropriate to analyse cardiovascular disease data.</p>
</abstract>
<kwd-group>
<label>Key words</label>
<kwd>logistic regression</kwd>
<kwd>decision tree</kwd>
<kwd>ischemic heart disease</kwd>
<kwd>cardiovascular disease</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="j_info1198_s_001">
<label>1</label>
<title>Introduction</title>
<p>High mortality from cardiovascular diseases (CVD) is a major health problem in the Lithuanian male population. During the last decades, an increasing trend of CVD mortality was observed in Lithuanian men, reaching 728.9 deaths per 100000 population in 2015 and being one of the highest in Europe (Lithuanian Ministry of Health, <xref ref-type="bibr" rid="j_info1198_ref_014">2016</xref>). Epidemiological studies have demonstrated that the prevalence of conventional CVD risk factors is also very high in the Lithuanian population (Rėklaitienė <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_019">2012</xref>). Prognostic values of these risk factors for the development of CVD in Lithuania have been found to be comparable to those in other populations (Tamošiūnas <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_026">2014</xref>; Kuzmickienė <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_013">2013</xref>). However, the impact of specific lifestyle and biological risk factors on the prediction of mortality from CVD – and especially the prediction of the risk of CVD morbidity – is still underestimated not only in Lithuania, but in other Baltic countries as well.</p>
<p>Regression analysis and classification can be performed using a popular statistical learning method called recursive partitioning (Kerdprasop and Kittisak, <xref ref-type="bibr" rid="j_info1198_ref_011">2011</xref>; Strobl <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_025">2009</xref>; Hothorn <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_008">2006</xref>). Problems related to the analysis of data on health and the risk of mortality and morbidity could also be solved by other modern methods of statistical analysis, such as artificial neural networks, support vector machines, ensemble methods employing bagging and boosting algorithms – but recursive partitioning has a distinct feature. In contrast to many “black box” methods in which the internal logic can be difficult to work out, recursive partitioning offers a result as a simple human readable representation having a shape of a tree (Jing, <xref ref-type="bibr" rid="j_info1198_ref_010">2013</xref>; Breiman <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_002">1984</xref>; Zhao <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_029">2016</xref>). Therefore, these methods are also called <italic>decision trees</italic>. Statistical data analysis techniques usually put some restrictions on the sample: normality, homoscedasticity, independence, etc. Hypothesis testing is a common step performed before the application of some statistical methods. The model is considered valid if these assumptions are satisfied.</p>
<p>This approach is not used in data mining and machine learning algorithms. Nevertheless, there is a need to validate the results using these techniques as well. Cross-validation (CV) is a common accuracy assessment technique for machine learning algorithms (Han <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_007">2012</xref>). CV is used to estimate the precision of the models.</p>
<p>Logistic regression is the most common method used to model CVD. The main purpose of this article was to compare the traditional binary logistic regression (LR) analysis with the decision tree (DT) analysis for the evaluation of the risk of CVD in adult men living in the city. For this purpose, we selected the conditional inference tree method which is not often used for comparison.</p>
</sec>
<sec id="j_info1198_s_002" sec-type="materials|methods">
<label>2</label>
<title>Materials and Methods</title>
<sec id="j_info1198_s_003">
<label>2.1</label>
<title>Study Population</title>
<p>In our study, we used data from the Multifactorial Ischemic Heart Disease Prevention Study (MIHDPS). In the MIHDPS study, a random sample of male inhabitants of Kaunas city (Lithuania) aged 40–59 years was examined (between 1977 and 1980). The initial survey included 5933 men (participation rate – 69.8%). We excluded 307 men because of duplicates or incomplete information on variables used in the current analysis. The final number of participants included in the current analysis was 5626. The same sample was used for both logistic regression and decision tree models.</p>
<p>This study was based on voluntary, informed participation. The participants did not provide written consent prior to the baseline examination, as this was not required in the former Soviet Union. The participants’ records and information were anonymized and de-identified prior to the analysis.</p>
<p>In this article, the conditional inference tree implemented in the <italic>party</italic> package of R statistical software will be used. Men with ischemic heart disease (IHD) (previous myocardial infarction (MI), angina pectoris, and ischemic changes in the electrocardiogram) or cardiovascular disease (IHD + stroke and intermittent claudication) were assigned to the case group, and the remaining subjects were assigned to the control group (men without IHD or CVD).</p>
</sec>
<sec id="j_info1198_s_004">
<label>2.2</label>
<title>Measurements</title>
<p>Data were collected using a standard protocol and uniform methods of measurement. All participants underwent physical examination (total cholesterol level, blood pressure (BP), height, and weight measurements). BP was measured on the right brachial artery using a mercury sphygmomanometer and appropriately sized arm cuffs in the sitting position after 5 minutes of rest. The measurements were performed to the nearest 2 mmHg. The first Korotkoff phase was recorded as systolic BP, and the fifth Korotkoff phase was used to determine diastolic BP. The average of two measurements was used in the analysis. The height of the participants was measured with a stadiometer, approximating the measurements to the nearest centimeter. Weight was measured with standardized medical scales, with the patient wearing no shoes or heavy clothes, and the measurements were approximated to the nearest 0.1 kg. The body mass index (BMI) was calculated as weight in kilograms divided by height in meters squared (kg/m<inline-formula id="j_info1198_ineq_001"><alternatives><mml:math>
<mml:msup>
<mml:mrow/>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${^{2}}$]]></tex-math></alternatives></inline-formula>).</p>
<p>Fasting serum samples were analyzed in the Laboratory of the Institute of Cardiology, the Lithuanian University of Health Sciences. Total serum cholesterol concentration was measured by applying the method proposed by Huang <italic>et al.</italic> (<xref ref-type="bibr" rid="j_info1198_ref_009">1961</xref>). Fasting glucose concentration was directly determined in serum by using the ortho-toluidine technique (Glasunov <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_006">1981</xref>).</p>
<p>A standard questionnaire was applied to obtain data on the respondents age, physical activity, smoking status, alcohol consumption, the use of antihypertensive, lipid-lowering, or antidiabetic medications, and family history of CVD. Physical activity was assessed by hours spent for moderate physical activity (walking, standing, and sitting) per working day and hours per week spent for this activity during the leisure-time. The respondents were categorized into two groups according to their level of physical activity during working days and during leisure time: active (⩾10 hours/week) and inactive (&lt;10 hours/week). According to the frequency of alcohol consumption, the respondents were classified into six groups: never or former drinkers, those consuming alcoholic beverages less frequently than once per month, 1–3 times per month, once per week, 2–3 times per week, several times per week, or daily. We also grouped the participants into two groups according to the reported frequency of alcohol: never-drinkers or former drinkers, and drinkers. According to the smoking habits, the participants were categorized as never-smokers, those smoking sometimes but not every day, daily smokers and quitters.</p>
<p>IHD at baseline was determined by: 1) a documented history of MI and/or ischemic changes on electrocardiogram (ECG) coded by Minnesota codes (MC) 1-1 or 1-2 (Prineas <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_017">1982</xref>); 2) angina pectoris as defined by G. Rose’s questionnaire (without MI and/or MC 1-1 or 1-2) (Rose <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_020">1982</xref>); 3) ECG findings coded by MC 1-3, 4-1, 4-2, 4-3, 5-1, 5-2, 5-3, 6-1, 6-2, 7-1, or 8-3 (without MI and/or MC 1-1, 1-2 and without angina pectoris). Previous stroke was determined on the basis of the documented history of stroke.</p>
<p>The information on family history MI, stroke, and sudden death was evaluated among first-degree relatives only: parents (father and mother) and siblings (brothers and/or sisters). The following questions were asked: “Did your father ever experience MI (stroke or sudden death)?”, “Did your mother ever experience MI (stroke or sudden death)?”, “Did your brother ever experience MI (stroke or sudden death)?”, and “Did your sister ever experience MI (stroke or sudden death)?” We also used a combined family history variable – family history of CVD – which included a history of MI and/or stroke and/or sudden death in at least one of the parents or siblings. According to the family history variable, the participants were categorized as having one or more parents or siblings with CVD and those without any first-degree relatives with CVD.</p>
</sec>
<sec id="j_info1198_s_005">
<label>2.3</label>
<title>Statistical Analysis</title>
<p>Firstly, we calculated descriptive statistics of the variables. Quantitative variables were described as median, minimum, and maximum because variable distributions did not satisfy the normality assumption (Kolmogorov–Smirnov Test). Nonparametric Mann-Whitney U test was used to determine differences in the distributions of continuous variables between control (men without IHD or CVD) and case (men with IHD or CVD) groups. Qualitative variables were described using frequencies. The Chi-squared test was used to determine differences in categorical variables between the control and the case groups. Univariate and multivariate binary logistic regression analysis were used.</p>
</sec>
<sec id="j_info1198_s_006">
<label>2.4</label>
<title>Logistic Regression</title>
<p>Binary logistic regression analysis is a non-linear regression technique that assumes that the expected probability of a binary outcome is: 
<disp-formula id="j_info1198_eq_001">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:mi mathvariant="italic">P</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">Y</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">β</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">β</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">β</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mo stretchy="false">⋯</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">β</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ P(Y=1)=\frac{1}{1+{e^{-({\beta _{0}}+{\beta _{1}}{X_{1}}+{\beta _{2}}{X_{2}}+\cdots +{\beta _{n}}{X_{n}})}}},\]]]></tex-math></alternatives>
</disp-formula> 
where the <inline-formula id="j_info1198_ineq_002"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${X_{n}}$]]></tex-math></alternatives></inline-formula> are variables with numeric values (if binary, they are zero for control and one for case) and the <inline-formula id="j_info1198_ineq_003"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">β</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${\beta _{n}}$]]></tex-math></alternatives></inline-formula> are the regression coefficients that quantify their contribution to the probability (Long <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_015">1993</xref>). Univariate binary logistic regression analysis was performed to identify the impact of clinical and lifestyle factors on the prevalence of IHD and CVD. Multiple logistic regression analysis was used to build the final model. Variable selection for multiple logistic regression model was performed using bidirectional <italic>stepwise</italic> procedure based on Akaike information criterion (AIC) (Akaike, <xref ref-type="bibr" rid="j_info1198_ref_001">1973</xref>).</p>
<p>Comparisons were expressed as odds ratios and 95% confidence intervals (95% CI) and Akaike information criterion: <inline-formula id="j_info1198_ineq_004"><alternatives><mml:math>
<mml:mi mathvariant="italic">AIC</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">k</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo movablelimits="false">ln</mml:mo>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">L</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$\mathit{AIC}=2k-2\ln (L)$]]></tex-math></alternatives></inline-formula>, where <italic>L</italic> – the value of the maximum likelihood function and <italic>k</italic> – number of the estimated parameters in the model (Akaike, <xref ref-type="bibr" rid="j_info1198_ref_001">1973</xref>).</p>
<p>A nonparametric receiver operating characteristic curve (ROC) was used to determine the discriminatory power of the represented model. A probability level of <italic>p</italic>-value &lt; 0.05 was taken as statistically significant.</p>
</sec>
<sec id="j_info1198_s_007">
<label>2.5</label>
<title>Decision Tree and Cross-Validation</title>
<p>Recursive partitioning is a greedy algorithm that splits data into partitions based on the values of a single variable. The splitting process is repeated recursively until some stop condition is satisfied, thus producing a tree-shaped model. If the number of the covariates is large enough, they also allow for producing a full-length tree, ending with a tree that contains a single class observation in each leaf. These trees have overfitting problems. An additional pruning procedure is necessary (Strobl <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_025">2009</xref>).</p>
<p>Many recursive partitioning methods have a selection bias towards covariates with many possible splits and the problem of overfitting. The latter problem could be solved by using the tree pruning procedure. We used recursive partitioning (implemented in the <italic>party</italic> package of R statistical software) based on conditional inference procedures. The selected method is based on a well-defined theory. It performs unbiased splitting selection and implements stop conditions based on the significance of the association between covariates and the response, thus eliminating overfitting and the need for tree pruning (Hothorn <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_008">2006</xref>). In this regard, selected conditional inference tree method is superior to other DT methods (ex. CART, C4.5, C5.0, ID3).</p>
<p>There are many descriptions of the decision tree structure. Presented in short, the decision tree structure is the following. The topmost decision node in a tree is called the <italic>root node</italic>; it corresponds to the best predictor. The final nodes are called <italic>terminal nodes</italic> or <italic>leaves</italic>. Every node, except for terminal nodes, contains a test condition and splits data into two or mode subsets based on the result of the test condition. Conditions are selected for the maximum separation of positive and negative classes in branches of the node. This procedure is applied recursively, thus creating possibly a good separation of the classes in the terminal nodes. The numbers after the predicted class for the terminal node indicate the probabilities of each class and allow to see <italic>the probability of the winning class</italic>, that is, the factor that determines the final classification (WebFOCUS RStat, <xref ref-type="bibr" rid="j_info1198_ref_027">2011</xref>).</p>
<p>The basic idea of cross-validation (e.g. Stone (<xref ref-type="bibr" rid="j_info1198_ref_024">1974</xref>); Geisser (<xref ref-type="bibr" rid="j_info1198_ref_005">1975</xref>)) is splitting data into subsets, where some subsets are used for fitting the model, and the rest of the data are used for the estimation of the prediction error. Statistical properties CV are then analysed – e.g. leave-one-out CV is asymptotically equivalent to the Akaike Information Criterion (Akaike, <xref ref-type="bibr" rid="j_info1198_ref_001">1973</xref>). Leave-one-out CV splits the data into smallest subsets containing a single observation. One observation is used for model error estimation, and the rest of the data are used for fitting the model. This procedure is repeated for every observation. Leave-one-out CV is a computationally intensive method. Another popular CV algorithm is <italic>k</italic>-fold CV. Data are split into equal-size <italic>k</italic> subsets (folds). A single subset is used for error estimation, and the rest are used for fitting the model. Leave-one-out CV can be considered a special case of <italic>k</italic>-fold CV in which the number of folds is equal to the sample size (Schneider, <xref ref-type="bibr" rid="j_info1198_ref_021">1997</xref>). In this paper, 10-fold CV is used to estimate the prediction error for both logistic regression and decision tree models. Value <inline-formula id="j_info1198_ineq_005"><alternatives><mml:math>
<mml:mi mathvariant="italic">k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>10</mml:mn></mml:math><tex-math><![CDATA[$k=10$]]></tex-math></alternatives></inline-formula> is a common choice recommended by a number of authors (e.g. Han <italic>et al.</italic> (<xref ref-type="bibr" rid="j_info1198_ref_007">2012</xref>); Witten <italic>et al.</italic> (<xref ref-type="bibr" rid="j_info1198_ref_028">2011</xref>)).</p>
</sec>
</sec>
<sec id="j_info1198_s_008">
<label>3</label>
<title>Results</title>
<p>Clinical characteristics of the patients are presented in Table <xref rid="j_info1198_tab_001">1</xref>. Glucose level after a 2-hour load did not differ significantly between patients with and without CVD (<italic>p</italic>-value = 0.074). Smoking at present and physical activity in the summer did not differ significantly between patients with and without IHD (<italic>p</italic>-values: 0.114 and 0.069, respectively). The proportions of MI and diabetes in the subjects’ mothers did not differ significantly between patients with and without CVD (<italic>p</italic>-values: 0.066 and 0.167, respectively) or IHD (<italic>p</italic>-values: 0.062 and 0.195, respectively) either. All the remaining variables differed statistically significantly between the case and the control groups (all <italic>p</italic>-values were smaller than 0.05).</p>
<table-wrap id="j_info1198_tab_001">
<label>Table 1</label>
<caption>
<p>Clinical characteristics of the patient with IHD or CVD and without IHD or CVD.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin"/>
<td colspan="3" style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Ischemic heart disease (IHD)</td>
<td colspan="3" style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Cardiovascular disease (CVD)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Variable</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Yes <inline-formula id="j_info1198_ineq_006"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>612</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(N=612)$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">No <inline-formula id="j_info1198_ineq_007"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>5014</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(N=5014)$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"><italic>p</italic>-value</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Yes <inline-formula id="j_info1198_ineq_008"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>674</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(N=674)$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">No <inline-formula id="j_info1198_ineq_009"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>4952</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(N=4952)$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"><italic>p</italic>-value</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">Systolic blood pressure (SBP), mm Hg, median (min; max)</td>
<td style="vertical-align: top; text-align: right">141 (93; 240)</td>
<td style="vertical-align: top; text-align: right">133 (88; 236)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<inline-formula id="j_info1198_ineq_010"><alternatives><mml:math>
<mml:msup>
<mml:mrow/>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${^{a}}$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: right">141 (93; 240)</td>
<td style="vertical-align: top; text-align: right">133 (88; 236)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Diastolic blood pressure (DBP), mm Hg, median (min; max)</td>
<td style="vertical-align: top; text-align: right">90 (56;150)</td>
<td style="vertical-align: top; text-align: right">86 (47; 142)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">90 (56; 150)</td>
<td style="vertical-align: top; text-align: right">86 (47; 142)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Skinfold thickness – triceps (STT), mm, median (min; max)</td>
<td style="vertical-align: top; text-align: right">10.2 (1; 38.2)</td>
<td style="vertical-align: top; text-align: right">9.8 (1; 39.4)</td>
<td style="vertical-align: top; text-align: right">0.006<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">10.2 (1; 38.2)</td>
<td style="vertical-align: top; text-align: right">9.8 (1; 39.4)</td>
<td style="vertical-align: top; text-align: right">0.004<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Skinfold thickness – scapula (STS), mm, median (min; max)</td>
<td style="vertical-align: top; text-align: right">17.2 (2.4; 40)</td>
<td style="vertical-align: top; text-align: right">15.4 (1.6; 40)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">17.2 (2.4; 40)</td>
<td style="vertical-align: top; text-align: right">15.4 (1.6; 40)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Serum cholesterol (Cholesterol), mmol/L, median (min; max)</td>
<td style="vertical-align: top; text-align: right">6.1 (2.8; 10.2)</td>
<td style="vertical-align: top; text-align: right">5.9 (1.4; 13.2)</td>
<td style="vertical-align: top; text-align: right">0.014<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">6.1 (2.8; 12.7)</td>
<td style="vertical-align: top; text-align: right">5.9 (1.4; 13.2)</td>
<td style="vertical-align: top; text-align: right">0.002<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Glucose level after 2-hour load (Glucose), mmol/L,</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">median (min; max)</td>
<td style="vertical-align: top; text-align: right">7.5 (2.4; 19.3)</td>
<td style="vertical-align: top; text-align: right">7.2 (1.8; 19.9)</td>
<td style="vertical-align: top; text-align: right">0.010<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">7.4 (2.4; 19.3)</td>
<td style="vertical-align: top; text-align: right">7.2 (1.8; 19.9)</td>
<td style="vertical-align: top; text-align: right">0.074<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Mother alive (MA), n (%): <italic>No</italic></td>
<td style="vertical-align: top; text-align: right">351 (57.35)</td>
<td style="vertical-align: top; text-align: right">2522 (50.30)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">388 (57.57)</td>
<td style="vertical-align: top; text-align: right">2485 (50.18)</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">257 (41.99)</td>
<td style="vertical-align: top; text-align: right">2454 (48.94)</td>
<td style="vertical-align: top; text-align: right">0.004<sup>b</sup></td>
<td style="vertical-align: top; text-align: right">282 (41.84)</td>
<td style="vertical-align: top; text-align: right">2429 (49.05)</td>
<td style="vertical-align: top; text-align: right">0.002<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>I don’t know</italic></td>
<td style="vertical-align: top; text-align: right">4 (0.65)</td>
<td style="vertical-align: top; text-align: right">38 (0.76)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">4 (0.59)</td>
<td style="vertical-align: top; text-align: right">38 (0.77)</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Mother’s myocardial infarction (MMI), n (%): <italic>No</italic></td>
<td style="vertical-align: top; text-align: right">569 (92.97)</td>
<td style="vertical-align: top; text-align: right">4747 (94.67)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">628 (93.18)</td>
<td style="vertical-align: top; text-align: right">4688 (94.67)</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">26 (4.25)</td>
<td style="vertical-align: top; text-align: right">130 (2.59)</td>
<td style="vertical-align: top; text-align: right">0.062<sup>b</sup></td>
<td style="vertical-align: top; text-align: right">28 (4.15)</td>
<td style="vertical-align: top; text-align: right">128 (2.58)</td>
<td style="vertical-align: top; text-align: right">0.066<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>I don’t know</italic></td>
<td style="vertical-align: top; text-align: right">17 (2.78)</td>
<td style="vertical-align: top; text-align: right">137 (2.73)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">18 (2.67)</td>
<td style="vertical-align: top; text-align: right">136 (2.75)</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of brothers and sisters at present (NBSP),</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">median (min; max)</td>
<td style="vertical-align: top; text-align: right">2 (0; 12)</td>
<td style="vertical-align: top; text-align: right">2 (0; 16)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">2 (0; 12)</td>
<td style="vertical-align: top; text-align: right">2 (0; 16)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Increased blood pressure (IBP), n (%): <italic>No</italic></td>
<td style="vertical-align: top; text-align: right">414 (67.65)</td>
<td style="vertical-align: top; text-align: right">4095 (81.67)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
<td style="vertical-align: top; text-align: right">458 (67.95)</td>
<td style="vertical-align: top; text-align: right">4051 (81.81)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">198 (32.35)</td>
<td style="vertical-align: top; text-align: right">919 (18.33)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">216 (32.05)</td>
<td style="vertical-align: top; text-align: right">901 (18.19)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Blood pressure-lowering medicine (MLBP), n (%): <italic>No</italic></td>
<td style="vertical-align: top; text-align: right">505 (82.52)</td>
<td style="vertical-align: top; text-align: right">4709 (93.92)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
<td style="vertical-align: top; text-align: right">555 (82.34)</td>
<td style="vertical-align: top; text-align: right">4659 (94.08)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">107 (17.48)</td>
<td style="vertical-align: top; text-align: right">305 (6.08)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">119 (17.66)</td>
<td style="vertical-align: top; text-align: right">293 (5.92)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Last intake of medicine (LMWA), weeks ago,</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">median (min; max)</td>
<td style="vertical-align: top; text-align: right">0 (0; 9)</td>
<td style="vertical-align: top; text-align: right">0 (0; 9)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">0 (0; 9)</td>
<td style="vertical-align: top; text-align: right">0 (0;9)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability group (Disability), n (%):<italic>No</italic></td>
<td style="vertical-align: top; text-align: right">536 (87.58)</td>
<td style="vertical-align: top; text-align: right">4797 (95.67)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
<td style="vertical-align: top; text-align: right">582 (86.35)</td>
<td style="vertical-align: top; text-align: right">4751 (95.94)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">76 (12.42)</td>
<td style="vertical-align: top; text-align: right">217 (4.33)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">92 (13.65)</td>
<td style="vertical-align: top; text-align: right">201 (4.06)</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking habits (Smoking), n (%): <italic>Never smoked</italic></td>
<td style="vertical-align: top; text-align: right">161 (26.31)</td>
<td style="vertical-align: top; text-align: right">1516 (30.24)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">173 (25.67)</td>
<td style="vertical-align: top; text-align: right">1504 (30.37)</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Not every day</italic></td>
<td style="vertical-align: top; text-align: right">15 (2.45)</td>
<td style="vertical-align: top; text-align: right">135 (2.69)</td>
<td style="vertical-align: top; text-align: right">0.114<sub>b</sub></td>
<td style="vertical-align: top; text-align: right">16 (2.37)</td>
<td style="vertical-align: top; text-align: right">134 (2.71)</td>
<td style="vertical-align: top; text-align: right">0.032<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Regular smokers and quitters</italic></td>
<td style="vertical-align: top; text-align: right">436 (71.24)</td>
<td style="vertical-align: top; text-align: right">3363 (67.07)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">485 (71.96)</td>
<td style="vertical-align: top; text-align: right">3314 (66.92)</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol drinking (Alcohol), n (%): <italic>No</italic></td>
<td style="vertical-align: top; text-align: right">89 (14.54)</td>
<td style="vertical-align: top; text-align: right">385 (7.68)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
<td style="vertical-align: top; text-align: right">106 (15.73)</td>
<td style="vertical-align: top; text-align: right">368 (7.43)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">523 (85.46)</td>
<td style="vertical-align: top; text-align: right">4629 (92.32)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">568 (84.27)</td>
<td style="vertical-align: top; text-align: right">4584 (92.57)</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of working days per week (NWDPW),</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">median (min; max)</td>
<td style="vertical-align: top; text-align: right">5 (0; 7)</td>
<td style="vertical-align: top; text-align: right">5 (0; 7)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">5 (0; 7)</td>
<td style="vertical-align: top; text-align: right">5 (0; 7)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Walking in summer (WSHPW), hours per week,</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">median (min; max)</td>
<td style="vertical-align: top; text-align: right">6 (0; 30)</td>
<td style="vertical-align: top; text-align: right">4 (0; 32)</td>
<td style="vertical-align: top; text-align: right">0.004<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">5.5 (0; 30)</td>
<td style="vertical-align: top; text-align: right">4 (0; 32)</td>
<td style="vertical-align: top; text-align: right">0.002<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Walking in winter (WWHPW), hours per week,</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">median (min; max)</td>
<td style="vertical-align: top; text-align: right">4 (0; 30)</td>
<td style="vertical-align: top; text-align: right">3 (0; 30)</td>
<td style="vertical-align: top; text-align: right">0.002<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">4 (0; 30)</td>
<td style="vertical-align: top; text-align: right">3 (0;30)</td>
<td style="vertical-align: top; text-align: right">0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Physical activity in summer (PASHPW),</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">hours per week, median (min; max)</td>
<td style="vertical-align: top; text-align: right">7 (0; 30)</td>
<td style="vertical-align: top; text-align: right">7 (0; 32)</td>
<td style="vertical-align: top; text-align: right">0.069<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">7 (0; 30)</td>
<td style="vertical-align: top; text-align: right">7 (0; 32)</td>
<td style="vertical-align: top; text-align: right">0.024<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">BMI, kg/<inline-formula id="j_info1198_ineq_011"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${m^{2}}$]]></tex-math></alternatives></inline-formula>, median (min; max)</td>
<td style="vertical-align: top; text-align: right">28.2 (17.6; 42.9)</td>
<td style="vertical-align: top; text-align: right">27.1 (17; 47.6)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">28.2 (17.6; 42.9)</td>
<td style="vertical-align: top; text-align: right">27.1 (14; 47.6)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Age, years, median (min; max)</td>
<td style="vertical-align: top; text-align: right">51.6 (39.7;62.8)</td>
<td style="vertical-align: top; text-align: right">49.1 (38.6; 61.9)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
<td style="vertical-align: top; text-align: right">51.8 (39.7; 62.8)</td>
<td style="vertical-align: top; text-align: right">49.1 (38.6; 61.9)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>a</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Intermittent claudication (Claudication), n (%): <italic>No</italic></td>
<td style="vertical-align: top; text-align: right">596 (97.39)</td>
<td style="vertical-align: top; text-align: right">4976 (99.24)</td>
<td style="vertical-align: top; text-align: right">&lt;0.001<sup>b</sup></td>
<td style="vertical-align: top; text-align: right">–</td>
<td style="vertical-align: top; text-align: right">–</td>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"><italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">16 (2.61)</td>
<td style="vertical-align: top; text-align: right">38 (0.76)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Diabetes, n (%): <italic>No</italic></td>
<td style="vertical-align: top; text-align: right">602 (98.37)</td>
<td style="vertical-align: top; text-align: right">4965 (99.02)</td>
<td style="vertical-align: top; text-align: right">0.195<sup>b</sup></td>
<td style="vertical-align: top; text-align: right">663 (98.37)</td>
<td style="vertical-align: top; text-align: right">4904 (99.03)</td>
<td style="vertical-align: top; text-align: right">0.167<sup>b</sup></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"><italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">10 (1.63)</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">49 (0.97)</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin"/>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">11 (1.63)</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">48 (0.97)</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin"/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><sup>a</sup> – <italic>p</italic>-value calculated in the nonparametric Mann-Whitney U test; <sup>b</sup> – <italic>p</italic>-value calculated in the Chi-squared test; <italic>p</italic>-value is the probability to reject the true null hypothesis. The probability value below which the null hypothesis is rejected is called significance level <italic>α</italic>. The value <inline-formula id="j_info1198_ineq_012"><alternatives><mml:math>
<mml:mi mathvariant="italic">α</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0.05</mml:mn></mml:math><tex-math><![CDATA[$\alpha =0.05$]]></tex-math></alternatives></inline-formula> was used.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap id="j_info1198_tab_002">
<label>Table 2</label>
<caption>
<p>One variable binary logistic regression analysis for the identification of clinically important factors for ischemic heart disease.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Variable</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coef.</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">OR (95% CI)</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">AIC</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">AUC</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><italic>p</italic>-value</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">Systolic blood pressure (SBP), mm Hg</td>
<td style="vertical-align: top; text-align: right">0.022</td>
<td style="vertical-align: top; text-align: right">1.022 (1.018; 1.026)</td>
<td style="vertical-align: top; text-align: right">3751</td>
<td style="vertical-align: top; text-align: right">0.615</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Diastolic blood pressure (DBP), mm Hg</td>
<td style="vertical-align: top; text-align: right">0.032</td>
<td style="vertical-align: top; text-align: right">1.032 (1.025; 1.039)</td>
<td style="vertical-align: top; text-align: right">3788</td>
<td style="vertical-align: top; text-align: right">0.596</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Skinfold thickness – triceps (STT), mm</td>
<td style="vertical-align: top; text-align: right">0.029</td>
<td style="vertical-align: top; text-align: right">1.029 (1.011; 1.047)</td>
<td style="vertical-align: top; text-align: right">3864</td>
<td style="vertical-align: top; text-align: right">0.534</td>
<td style="vertical-align: top; text-align: right">0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Skinfold thickness – scapula (STS), mm</td>
<td style="vertical-align: top; text-align: right">0.039</td>
<td style="vertical-align: top; text-align: right">1.040 (1.027; 1.053)</td>
<td style="vertical-align: top; text-align: right">3836</td>
<td style="vertical-align: top; text-align: right">0.576</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Serum cholesterol (Cholesterol), mmol/L</td>
<td style="vertical-align: top; text-align: right">0.101</td>
<td style="vertical-align: top; text-align: right">1.107 (1.028; 1.191)</td>
<td style="vertical-align: top; text-align: right">3867</td>
<td style="vertical-align: top; text-align: right">0.530</td>
<td style="vertical-align: top; text-align: right">0.007</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Glucose level after 2-hour load</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">(Glucose), mmol/L</td>
<td style="vertical-align: top; text-align: right">0.036</td>
<td style="vertical-align: top; text-align: right">1.037 (1.006; 1.068)</td>
<td style="vertical-align: top; text-align: right">3869</td>
<td style="vertical-align: top; text-align: right">0.532</td>
<td style="vertical-align: top; text-align: right">0.017</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Mother alive (MA)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MA = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">−0.284</td>
<td style="vertical-align: top; text-align: right">0.752 (0.634; 0.892)</td>
<td style="vertical-align: top; text-align: right">3865</td>
<td style="vertical-align: top; text-align: right">0.536</td>
<td style="vertical-align: top; text-align: right">0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MA = <italic>I don’t know</italic></td>
<td style="vertical-align: top; text-align: right">−0.279</td>
<td style="vertical-align: top; text-align: right">0.756 (0.226; 1.896)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">0.597</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Mother’s myocardial infarction (MMI)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MMI = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">0.512</td>
<td style="vertical-align: top; text-align: right">1.669 (1.063; 2.521)</td>
<td style="vertical-align: top; text-align: right">3871</td>
<td style="vertical-align: top; text-align: right">0.509</td>
<td style="vertical-align: top; text-align: right">0.012</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MMI = <italic>I don’t know</italic></td>
<td style="vertical-align: top; text-align: right">0.035</td>
<td style="vertical-align: top; text-align: right">1.035 (0.599; 1.677)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">0.894</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of brothers and sisters</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">at present (NBSP)</td>
<td style="vertical-align: top; text-align: right">−0.069</td>
<td style="vertical-align: top; text-align: right">0.933 (0.893; 0.973)</td>
<td style="vertical-align: top; text-align: right">3864</td>
<td style="vertical-align: top; text-align: right">0.542</td>
<td style="vertical-align: top; text-align: right">0.002</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Increased blood pressure (IBP)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">IBP = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">0.757</td>
<td style="vertical-align: top; text-align: right">2.131 (1.771; 2.558)</td>
<td style="vertical-align: top; text-align: right">3814</td>
<td style="vertical-align: top; text-align: right">0.570</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Blood pressure-lowering medicine (MLBP)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">BPLM = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">1.185</td>
<td style="vertical-align: top; text-align: right">3.271 (2.568; 4.140)</td>
<td style="vertical-align: top; text-align: right">3793</td>
<td style="vertical-align: top; text-align: right">0.557</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Last intake of medicine</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">(LMWA), weeks ago</td>
<td style="vertical-align: top; text-align: right">0.092</td>
<td style="vertical-align: top; text-align: right">1.096 (1.053; 1.139)</td>
<td style="vertical-align: top; text-align: right">3856</td>
<td style="vertical-align: top; text-align: right">0.533</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability group (Disability)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">1.142</td>
<td style="vertical-align: top; text-align: right">3.134 (2.366; 4.113)</td>
<td style="vertical-align: top; text-align: right">3819</td>
<td style="vertical-align: top; text-align: right">0.540</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking habits (Smoking)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking = <italic>Not every day</italic></td>
<td style="vertical-align: top; text-align: right">0.045</td>
<td style="vertical-align: top; text-align: right">1.046 (0.576; 1.772)</td>
<td style="vertical-align: top; text-align: right">3872</td>
<td style="vertical-align: top; text-align: right">0.521</td>
<td style="vertical-align: top; text-align: right">0.874</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking = <italic>Regular smokers and quitters</italic></td>
<td style="vertical-align: top; text-align: right">0.199</td>
<td style="vertical-align: top; text-align: right">1.221 (1.011; 1.481)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">0.040</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol drinking (Alcohol)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">−0.716</td>
<td style="vertical-align: top; text-align: right">0.489 (0.383; 0.629)</td>
<td style="vertical-align: top; text-align: right">3846</td>
<td style="vertical-align: top; text-align: right">0.534</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of working (NWDPW), days/week</td>
<td style="vertical-align: top; text-align: right">−0.252</td>
<td style="vertical-align: top; text-align: right">0.777 (0.726; 0.834)</td>
<td style="vertical-align: top; text-align: right">3831</td>
<td style="vertical-align: top; text-align: right">0.533</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Walking in summer (WSHPW), hours/week</td>
<td style="vertical-align: top; text-align: right">0.019</td>
<td style="vertical-align: top; text-align: right">1.019 (1.006; 1.032)</td>
<td style="vertical-align: top; text-align: right">3866</td>
<td style="vertical-align: top; text-align: right">0.535</td>
<td style="vertical-align: top; text-align: right">0.004</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Walking in winter (WWHPW), hours/week</td>
<td style="vertical-align: top; text-align: right">0.021</td>
<td style="vertical-align: top; text-align: right">1.021 (1.006; 1.035)</td>
<td style="vertical-align: top; text-align: right">3866</td>
<td style="vertical-align: top; text-align: right">0.537</td>
<td style="vertical-align: top; text-align: right">0.004</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Physical activity in summer,</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">(PASHPW) hours/week</td>
<td style="vertical-align: top; text-align: right">−0.008</td>
<td style="vertical-align: top; text-align: right">0.992 (0.981; 1.002)</td>
<td style="vertical-align: top; text-align: right">3872</td>
<td style="vertical-align: top; text-align: right">0.522</td>
<td style="vertical-align: top; text-align: right">0.110</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">BMI, kg/m<inline-formula id="j_info1198_ineq_013"><alternatives><mml:math>
<mml:msup>
<mml:mrow/>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${^{2}}$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: right">0.076</td>
<td style="vertical-align: top; text-align: right">1.079 (1.056; 1.103)</td>
<td style="vertical-align: top; text-align: right">3828</td>
<td style="vertical-align: top; text-align: right">0.582</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Age, years</td>
<td style="vertical-align: top; text-align: right">0.070</td>
<td style="vertical-align: top; text-align: right">1.073 (1.056; 1.089)</td>
<td style="vertical-align: top; text-align: right">3791</td>
<td style="vertical-align: top; text-align: right">0.611</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Intermittent claudication (Claudication)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Claudication = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">1.257</td>
<td style="vertical-align: top; text-align: right">3.515 (1.896; 6.226)</td>
<td style="vertical-align: top; text-align: right">3860</td>
<td style="vertical-align: top; text-align: right">0.509</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Diabetes</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Diabetes = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.521</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">1.683 (0.800; 3.199)</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">3872</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.503</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.136</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Coef – coefficient estimate of the binary logistic regression; OR – odds ratio; AIC – Akaike information criterion; AUC – area under the ROC curve; <italic>p</italic>-value is probability to reject the true null hypothesis. The probability value below which the null hypothesis is rejected is called significance level <italic>α</italic>. The value <inline-formula id="j_info1198_ineq_014"><alternatives><mml:math>
<mml:mi mathvariant="italic">α</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0.05</mml:mn></mml:math><tex-math><![CDATA[$\alpha =0.05$]]></tex-math></alternatives></inline-formula> was used.</p>
</table-wrap-foot>
</table-wrap>
<p>A binary logistic regression model was used to identify risk factors related to IHD and CVD. From a set of variables (Tables <xref rid="j_info1198_tab_002">2</xref>, <xref rid="j_info1198_tab_004">4</xref>), the following variables remained as independent variables in the final model (multiple analysis) (Tables <xref rid="j_info1198_tab_003">3</xref>, <xref rid="j_info1198_tab_005">5</xref>): systolic blood pressure (SBP), the number of brothers and sisters at present (NBSP), usage of blood pressure-lowering medicines (MLBP), disability group (Disability), alcohol drinking (Alcohol), body mass index (BMI), the patient’s age (Age), and intermittent claudication (Claudication) for IHD, and systolic blood pressure (SBP), serum cholesterol (Cholesterol), number of brothers and sisters at present (NBSP), usage of blood pressure-lowering medicines (MLBP), disability group (Disability), alcohol drinking (Alcohol), smoking habits (Smoking), number of working days per week (NWDPW), time for walking in winter (WWHPW), body mass index (BMI), and the patient’s age (Age) for CVD.</p>
<p>Taking BPLM, disability, intermittent claudication, regular smoking, and higher value of BMI, SBP, age, serum cholesterol, and walking in winter were associated with a higher probability of IHD or CVD (equations (<xref rid="j_info1198_eq_002">1</xref>) and (<xref rid="j_info1198_eq_003">2</xref>) are binary logistic regression equations, where <inline-formula id="j_info1198_ineq_015"><alternatives><mml:math><mml:mover accent="false">
<mml:mrow>
<mml:mi mathvariant="italic">P</mml:mi>
</mml:mrow>
<mml:mo stretchy="true">ˆ</mml:mo></mml:mover></mml:math><tex-math><![CDATA[$\widehat{P}$]]></tex-math></alternatives></inline-formula> is the estimate of probability). However, a higher number of brothers and sisters and alcohol drinking were associated with a lower probability of these diseases. 
<disp-formula id="j_info1198_eq_002">
<label>(1)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="right left" columnspacing="0pt">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mo movablelimits="false">ln</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:mover accent="false">
<mml:mrow>
<mml:mi mathvariant="italic">P</mml:mi>
</mml:mrow>
<mml:mo stretchy="true">ˆ</mml:mo></mml:mover>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="normal">IHD</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mover accent="false">
<mml:mrow>
<mml:mi mathvariant="italic">P</mml:mi>
</mml:mrow>
<mml:mo stretchy="true">ˆ</mml:mo></mml:mover>
<mml:mi mathvariant="normal">IHD</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">no</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo>=</mml:mo>
</mml:mtd>
<mml:mtd class="align-even">
<mml:mo>−</mml:mo>
<mml:mn>7.260</mml:mn>
<mml:mo>+</mml:mo>
<mml:mn>0.012</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">SBP</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>0.065</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">NBSP</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd"/>
<mml:mtd class="align-even">
<mml:mo>+</mml:mo>
<mml:mn>0.847</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">MLBP</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mn>0.673</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">Disability</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd"/>
<mml:mtd class="align-even">
<mml:mo>−</mml:mo>
<mml:mn>0.0461</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">Alcohol</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="normal">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mn>0.046</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">BMI</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd"/>
<mml:mtd class="align-even">
<mml:mo>+</mml:mo>
<mml:mn>0.050</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">Age</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>0.805</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">Claudication</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[\begin{aligned}{}\ln \frac{\widehat{P}(\mathrm{IHD}=\mathit{yes})}{\widehat{P}\mathrm{IHD}=\mathit{no})}=& -7.260+0.012\times \mathrm{SBP}-0.065\times \mathrm{NBSP}\\ {} & +0.847\times \mathrm{MLBP}(\mathit{yes})+0.673\times \mathrm{Disability}(\mathit{yes})\\ {} & -0.0461\times \mathrm{Alcohol}(\mathrm{yes})+0.046\times \mathrm{BMI}\\ {} & +0.050\times \mathrm{Age}+0.805\times \mathrm{Claudication},\end{aligned}\]]]></tex-math></alternatives>
</disp-formula> 
<disp-formula id="j_info1198_eq_003">
<label>(2)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="right left" columnspacing="0pt">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mo movablelimits="false">ln</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:mover accent="false">
<mml:mrow>
<mml:mi mathvariant="italic">P</mml:mi>
</mml:mrow>
<mml:mo stretchy="true">ˆ</mml:mo></mml:mover>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="normal">CVD</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mover accent="false">
<mml:mrow>
<mml:mi mathvariant="italic">P</mml:mi>
</mml:mrow>
<mml:mo stretchy="true">ˆ</mml:mo></mml:mover>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="normal">CVD</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="normal">no</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo>=</mml:mo>
</mml:mtd>
<mml:mtd class="align-even">
<mml:mo>−</mml:mo>
<mml:mn>7.364</mml:mn>
<mml:mo>+</mml:mo>
<mml:mn>0.011</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">SBP</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>0.055</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">NBSP</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd"/>
<mml:mtd class="align-even">
<mml:mo>+</mml:mo>
<mml:mn>0.710</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">MLBP</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mn>0.830</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">Disability</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd"/>
<mml:mtd class="align-even">
<mml:mo>+</mml:mo>
<mml:mn>0.237</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">Smoking</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>−</mml:mo>
<mml:mn>0.618</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">Alcohol</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd"/>
<mml:mtd class="align-even">
<mml:mo>−</mml:mo>
<mml:mn>0.097</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">NWDPW</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>0.016</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">WWHPW</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd"/>
<mml:mtd class="align-even">
<mml:mo>+</mml:mo>
<mml:mn>0.051</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">BMI</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>0.531</mml:mn>
<mml:mo>×</mml:mo>
<mml:mi mathvariant="normal">Age</mml:mi>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[\begin{aligned}{}\ln \frac{\widehat{P}(\mathrm{CVD}=\mathit{yes})}{\widehat{P}(\mathrm{CVD}=\mathrm{no})}=& -7.364+0.011\times \mathrm{SBP}-0.055\times \mathrm{NBSP}\\ {} & +0.710\times \mathrm{MLBP}(\mathit{yes})+0.830\times \mathrm{Disability}(\mathit{yes})\\ {} & +0.237\times \mathrm{Smoking}(\mathit{yes})-0.618\times \mathrm{Alcohol}(\mathit{yes})\\ {} & -0.097\times \mathrm{NWDPW}+0.016\times \mathrm{WWHPW}\\ {} & +0.051\times \mathrm{BMI}+0.531\times \mathrm{Age}.\end{aligned}\]]]></tex-math></alternatives>
</disp-formula>
</p>
<p>Our binary logistic regression models showed the power (IHD – AUC = 0.688 and CVD – AUC = 0.696) to discriminate IHD or CVD in the Lithuanian sample of middle-aged men (Tables <xref rid="j_info1198_tab_003">3</xref>, <xref rid="j_info1198_tab_005">5</xref>).</p>
<p>Conditional inference tree, calculated using R function <italic>ctree</italic> package <italic>party</italic>, method was used to build a decision tree. This method performs variable selection for tree splitting based on statistical criterion. A typical significance level value <inline-formula id="j_info1198_ineq_016"><alternatives><mml:math>
<mml:mi mathvariant="italic">α</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0.05</mml:mn></mml:math><tex-math><![CDATA[$\alpha =0.05$]]></tex-math></alternatives></inline-formula> was used.</p>
<table-wrap id="j_info1198_tab_003">
<label>Table 3</label>
<caption>
<p>Multiple binary logistic regression analysis for the identification of clinically important factors for ischemic heart disease.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Variable</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coef.</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">OR</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Lower</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Upper</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><italic>p</italic>-value</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">Systolic blood pressure (SBP), mm Hg</td>
<td style="vertical-align: top; text-align: right">0.012</td>
<td style="vertical-align: top; text-align: right">1.012</td>
<td style="vertical-align: top; text-align: right">1.008</td>
<td style="vertical-align: top; text-align: right">1.017</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of brothers and sisters at present (NBSP)</td>
<td style="vertical-align: top; text-align: right">−0.065</td>
<td style="vertical-align: top; text-align: right">0.937</td>
<td style="vertical-align: top; text-align: right">0.896</td>
<td style="vertical-align: top; text-align: right">0.979</td>
<td style="vertical-align: top; text-align: right">0.004</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Blood pressure-lowering medicine (MLBP)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MLBP = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">0.847</td>
<td style="vertical-align: top; text-align: right">2.333</td>
<td style="vertical-align: top; text-align: right">1.611</td>
<td style="vertical-align: top; text-align: right">3.346</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability group (Disability)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">0.673</td>
<td style="vertical-align: top; text-align: right">1.959</td>
<td style="vertical-align: top; text-align: right">1.394</td>
<td style="vertical-align: top; text-align: right">2.722</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol drinking (Alcohol)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">−0.461</td>
<td style="vertical-align: top; text-align: right">0.631</td>
<td style="vertical-align: top; text-align: right">0.482</td>
<td style="vertical-align: top; text-align: right">0.834</td>
<td style="vertical-align: top; text-align: right">0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">BMI, kg/m<inline-formula id="j_info1198_ineq_017"><alternatives><mml:math>
<mml:msup>
<mml:mrow/>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${^{2}}$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: right">0.046</td>
<td style="vertical-align: top; text-align: right">1.047</td>
<td style="vertical-align: top; text-align: right">1.023</td>
<td style="vertical-align: top; text-align: right">1.071</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Age, years</td>
<td style="vertical-align: top; text-align: right">0.050</td>
<td style="vertical-align: top; text-align: right">1.051</td>
<td style="vertical-align: top; text-align: right">1.035</td>
<td style="vertical-align: top; text-align: right">1.069</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Intermittent claudication (Claudication)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Claudication = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.805</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">2.236</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">1.150</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">4.144</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.013</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Coef – coefficient estimate of the binary logistic regression; OR – odds ratio; Lower – lower limit 95% confidence interval for odds ratio; Upper – upper limit 95% confidence interval for odds ratio; <italic>p</italic>-value is probability to reject the true null hypothesis. The probability value below which the null hypothesis is rejected is called significance level <italic>α</italic>. The value <inline-formula id="j_info1198_ineq_018"><alternatives><mml:math>
<mml:mi mathvariant="italic">α</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0.05</mml:mn></mml:math><tex-math><![CDATA[$\alpha =0.05$]]></tex-math></alternatives></inline-formula> was used. Akaike information criterion AIC = 3602. Area under the ROC curve AUC = 0.68751.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap id="j_info1198_tab_004">
<label>Table 4</label>
<caption>
<p>One variable binary logistic regression analysis for the identification of clinically important factors for cardiovascular disease.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Variable</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coef.</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">OR (95% CI)</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">AIC</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">AUC</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><italic>p</italic>-value</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">Systolic blood pressure (SBP), mm Hg</td>
<td style="vertical-align: top; text-align: right">0.021</td>
<td style="vertical-align: top; text-align: right">1.021 (1.017; 1.025)</td>
<td style="vertical-align: top; text-align: right">4008</td>
<td style="vertical-align: top; text-align: right">0.609</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Diastolic blood pressure (DBP), mm Hg</td>
<td style="vertical-align: top; text-align: right">0.030</td>
<td style="vertical-align: top; text-align: right">1.031 (1.024; 1.037)</td>
<td style="vertical-align: top; text-align: right">4044</td>
<td style="vertical-align: top; text-align: right">0.594</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Skinfold thickness– triceps (STT), mm</td>
<td style="vertical-align: top; text-align: right">0.029</td>
<td style="vertical-align: top; text-align: right">1.029 (1.012; 1.046)</td>
<td style="vertical-align: top; text-align: right">4117</td>
<td style="vertical-align: top; text-align: right">0.534</td>
<td style="vertical-align: top; text-align: right">0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Skinfold thickness – scapula (STS), mm</td>
<td style="vertical-align: top; text-align: right">0.038</td>
<td style="vertical-align: top; text-align: right">1.039 (1.026; 1.051)</td>
<td style="vertical-align: top; text-align: right">4090</td>
<td style="vertical-align: top; text-align: right">0.575</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Serum cholesterol (Cholesterol), mmol/L</td>
<td style="vertical-align: top; text-align: right">0.124</td>
<td style="vertical-align: top; text-align: right">1.132 (1.055; 1.214)</td>
<td style="vertical-align: top; text-align: right">4116</td>
<td style="vertical-align: top; text-align: right">0.536</td>
<td style="vertical-align: top; text-align: right">0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Glucose level after 2-hour load,</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">(Glucose) mmol/L</td>
<td style="vertical-align: top; text-align: right">0.025</td>
<td style="vertical-align: top; text-align: right">1.025 (0.996; 1.055)</td>
<td style="vertical-align: top; text-align: right">4125</td>
<td style="vertical-align: top; text-align: right">0.521</td>
<td style="vertical-align: top; text-align: right">0.093</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Mother alive (MA)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MA = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">−0.296</td>
<td style="vertical-align: top; text-align: right">0.744 (0.631; 0.875)</td>
<td style="vertical-align: top; text-align: right">4117</td>
<td style="vertical-align: top; text-align: right">0.537</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MA = <italic>I don’t know</italic></td>
<td style="vertical-align: top; text-align: right">−0.394</td>
<td style="vertical-align: top; text-align: right">0.674 (0.201; 1.689)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">0.456</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Mother myocardial infarction (MMI)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MMI = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">0.490</td>
<td style="vertical-align: top; text-align: right">1.633 (1.056; 2.440)</td>
<td style="vertical-align: top; text-align: right">4125</td>
<td style="vertical-align: top; text-align: right">0.508</td>
<td style="vertical-align: top; text-align: right">0.962</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MMI = <italic>I don’t know</italic></td>
<td style="vertical-align: top; text-align: right">−0.012</td>
<td style="vertical-align: top; text-align: right">0.988 (0.581; 1.583)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">0.962</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of brothers and sisters at present</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">(NBSP)</td>
<td style="vertical-align: top; text-align: right">−0.059</td>
<td style="vertical-align: top; text-align: right">0.942 (0.904; 0.981)</td>
<td style="vertical-align: top; text-align: right">4120</td>
<td style="vertical-align: top; text-align: right">0.538</td>
<td style="vertical-align: top; text-align: right">0.004</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Increased blood pressure (IBP)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">IBP = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">0.752</td>
<td style="vertical-align: top; text-align: right">2.120 (1.774; 2.526)</td>
<td style="vertical-align: top; text-align: right">4064</td>
<td style="vertical-align: top; text-align: right">0.569</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Blood pressure-lowering medicine (MLBP)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MLBP = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">1.227</td>
<td style="vertical-align: top; text-align: right">3.409 (2.670; 4.283)</td>
<td style="vertical-align: top; text-align: right">4035</td>
<td style="vertical-align: top; text-align: right">0.559</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Last intake of medicine</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">(LMWA) weeks ago</td>
<td style="vertical-align: top; text-align: right">0.104</td>
<td style="vertical-align: top; text-align: right">1.109 (1.067; 1.151)</td>
<td style="vertical-align: top; text-align: right">4103</td>
<td style="vertical-align: top; text-align: right">0.536</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability group (Disability)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">1.318</td>
<td style="vertical-align: top; text-align: right">3.736 (2.867; 4.838)</td>
<td style="vertical-align: top; text-align: right">4045</td>
<td style="vertical-align: top; text-align: right">0.548</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking habits (Smoking)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking = <italic>Not every day</italic></td>
<td style="vertical-align: top; text-align: right">0.037</td>
<td style="vertical-align: top; text-align: right">1.038 (0.583; 1.734)</td>
<td style="vertical-align: top; text-align: right">4123</td>
<td style="vertical-align: top; text-align: right">0.525</td>
<td style="vertical-align: top; text-align: right">0.893</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking = <italic>Regular smokers and quitters</italic></td>
<td style="vertical-align: top; text-align: right">0.241</td>
<td style="vertical-align: top; text-align: right">1.272 (1.061; 1.533)</td>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right"/>
<td style="vertical-align: top; text-align: right">0.013</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol drinking (Alcohol)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">−0.844</td>
<td style="vertical-align: top; text-align: right">0.430 (0.342 ;0.545)</td>
<td style="vertical-align: top; text-align: right">4084</td>
<td style="vertical-align: top; text-align: right">0.541</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of working (NWDPW), days/week</td>
<td style="vertical-align: top; text-align: right">−0.285</td>
<td style="vertical-align: top; text-align: right">0.752 (0.704; 0.804)</td>
<td style="vertical-align: top; text-align: right">4066</td>
<td style="vertical-align: top; text-align: right">0.541</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Walking in summer (WSHPW), hours/week</td>
<td style="vertical-align: top; text-align: right">0.020</td>
<td style="vertical-align: top; text-align: right">1.020 (1.008; 1.033)</td>
<td style="vertical-align: top; text-align: right">4118</td>
<td style="vertical-align: top; text-align: right">0.537</td>
<td style="vertical-align: top; text-align: right">0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Walking in winter (WWHPW), hours/week</td>
<td style="vertical-align: top; text-align: right">0.023</td>
<td style="vertical-align: top; text-align: right">1.024 (1.010; 1.038)</td>
<td style="vertical-align: top; text-align: right">4117</td>
<td style="vertical-align: top; text-align: right">0.540</td>
<td style="vertical-align: top; text-align: right">0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Physical activity in summer</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">(PASHPW), hours/week</td>
<td style="vertical-align: top; text-align: right">−0.010</td>
<td style="vertical-align: top; text-align: right">0.990 (0.980; 1.000)</td>
<td style="vertical-align: top; text-align: right">4124</td>
<td style="vertical-align: top; text-align: right">0.526</td>
<td style="vertical-align: top; text-align: right">0.046</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">BMI, kg/m<inline-formula id="j_info1198_ineq_019"><alternatives><mml:math>
<mml:msup>
<mml:mrow/>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${^{2}}$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: right">0.075</td>
<td style="vertical-align: top; text-align: right">1.078 (1.056; 1.100)</td>
<td style="vertical-align: top; text-align: right">4079</td>
<td style="vertical-align: top; text-align: right">0.581</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Age, years</td>
<td style="vertical-align: top; text-align: right">0.072</td>
<td style="vertical-align: top; text-align: right">1.075 (1.060; 1.091)</td>
<td style="vertical-align: top; text-align: right">4032</td>
<td style="vertical-align: top; text-align: right">0.615</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Diabetes</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Diabetes = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.528</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">1.695 (0.832; 3.158)</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">4126</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.503</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.117</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Coef – coefficient estimate of the binary logistic regression; OR – odds ratio; AIC – Akaike information criterion; AUC – area under the ROC curve; <italic>p</italic>-value is probability to reject the true null hypothesis. The probability value below which the null hypothesis is rejected is called significance level <italic>α</italic>. The value <inline-formula id="j_info1198_ineq_020"><alternatives><mml:math>
<mml:mi mathvariant="italic">α</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0.05</mml:mn></mml:math><tex-math><![CDATA[$\alpha =0.05$]]></tex-math></alternatives></inline-formula> was used.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap id="j_info1198_tab_005">
<label>Table 5</label>
<caption>
<p>Multiple binary logistic regression analysis for the identification of clinically important factors for cardiovascular disease.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Variable</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coef.</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">OR</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Lower</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Upper</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><italic>p</italic>-value</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">Systolic blood pressure (SBP), mm Hg</td>
<td style="vertical-align: top; text-align: right">0.011</td>
<td style="vertical-align: top; text-align: right">1.001</td>
<td style="vertical-align: top; text-align: right">1.007</td>
<td style="vertical-align: top; text-align: right">1.016</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Serum cholesterol (Cholesterol), mmol/L</td>
<td style="vertical-align: top; text-align: right">0.112</td>
<td style="vertical-align: top; text-align: right">1.119</td>
<td style="vertical-align: top; text-align: right">1.039</td>
<td style="vertical-align: top; text-align: right">1.204</td>
<td style="vertical-align: top; text-align: right">0.003</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of brothers and sisters at present (NBSP)</td>
<td style="vertical-align: top; text-align: right">−0.055</td>
<td style="vertical-align: top; text-align: right">0.947</td>
<td style="vertical-align: top; text-align: right">0.907</td>
<td style="vertical-align: top; text-align: right">0.987</td>
<td style="vertical-align: top; text-align: right">0.011</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Blood pressure-lowering medicine (MLBP)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">MLBP = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">0.7103</td>
<td style="vertical-align: top; text-align: right">2.035</td>
<td style="vertical-align: top; text-align: right">1.554</td>
<td style="vertical-align: top; text-align: right">2.648</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability group (Disability)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Disability = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">0.830</td>
<td style="vertical-align: top; text-align: right">2.294</td>
<td style="vertical-align: top; text-align: right">1.665</td>
<td style="vertical-align: top; text-align: right">3.133</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking habits (Smoking)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Smoking = <italic>Regular smokers and quitters</italic></td>
<td style="vertical-align: top; text-align: right">0.237</td>
<td style="vertical-align: top; text-align: right">1.267</td>
<td style="vertical-align: top; text-align: right">1.047</td>
<td style="vertical-align: top; text-align: right">1.540</td>
<td style="vertical-align: top; text-align: right">0.016</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol drinking (Alcohol)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Alcohol = <italic>Yes</italic></td>
<td style="vertical-align: top; text-align: right">−0.618</td>
<td style="vertical-align: top; text-align: right">0.539</td>
<td style="vertical-align: top; text-align: right">0.417</td>
<td style="vertical-align: top; text-align: right">0.702</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of working days per week (NWDPW)</td>
<td style="vertical-align: top; text-align: right">−0.097</td>
<td style="vertical-align: top; text-align: right">0.908</td>
<td style="vertical-align: top; text-align: right">0.839</td>
<td style="vertical-align: top; text-align: right">0.984</td>
<td style="vertical-align: top; text-align: right">0.018</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Walking in winter (WWHPW), hours per week</td>
<td style="vertical-align: top; text-align: right">0.016</td>
<td style="vertical-align: top; text-align: right">1.016</td>
<td style="vertical-align: top; text-align: right">1.000</td>
<td style="vertical-align: top; text-align: right">1.031</td>
<td style="vertical-align: top; text-align: right">0.036</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">BMI, kg/m<inline-formula id="j_info1198_ineq_021"><alternatives><mml:math>
<mml:msup>
<mml:mrow/>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${^{2}}$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: right">0.051</td>
<td style="vertical-align: top; text-align: right">1.052</td>
<td style="vertical-align: top; text-align: right">1.029</td>
<td style="vertical-align: top; text-align: right">1.076</td>
<td style="vertical-align: top; text-align: right">&lt;0.001</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Age, years</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">0.531</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">1.055</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">1.038</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">1.071</td>
<td style="vertical-align: top; text-align: right; border-bottom: solid thin">&lt;0.001</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Coef – coefficient estimate of the binary logistic regression; OR – odds ratio; Lower – lower limit 95% confidence interval for odds ratio; Upper – upper limit 95% confidence interval for odds ratio; <italic>p</italic>-value is probability to reject the true null hypothesis. The probability value below which the null hypothesis is rejected is called significance level <italic>α</italic>. The value <inline-formula id="j_info1198_ineq_022"><alternatives><mml:math>
<mml:mi mathvariant="italic">α</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0.05</mml:mn></mml:math><tex-math><![CDATA[$\alpha =0.05$]]></tex-math></alternatives></inline-formula> was used. Akaike information criterion AIC = 3802. Area under the ROC curve AUC = 0.69632.</p> 
</table-wrap-foot>
</table-wrap>
<fig id="j_info1198_fig_001">
<label>Fig. 1</label>
<caption>
<p>Decision tree model for the prediction of ischemic heart disease.</p>
</caption>
<graphic xlink:href="info1198_g001.jpg"/>
</fig>
<table-wrap id="j_info1198_tab_006">
<label>Table 6</label>
<caption>
<p>Errors of binary logistic regression and decision tree.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin"/>
<td colspan="3" style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">In-Sample</td>
<td colspan="3" style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">10-fold CV</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"/>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">MAE</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">EER</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">AUC</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">MAE</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">EER</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">AUC</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left"/>
<td colspan="6" style="vertical-align: top; text-align: center">IHD</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Logistic regression</td>
<td style="vertical-align: top; text-align: left">0.18080</td>
<td style="vertical-align: top; text-align: left">0.36993</td>
<td style="vertical-align: top; text-align: left">0.68751</td>
<td style="vertical-align: top; text-align: left">0.18213</td>
<td style="vertical-align: top; text-align: left">0.38070</td>
<td style="vertical-align: top; text-align: left">0.67438</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Decision tree</td>
<td style="vertical-align: top; text-align: left">0.18445</td>
<td style="vertical-align: top; text-align: left">0.38424</td>
<td style="vertical-align: top; text-align: left">0.66725</td>
<td style="vertical-align: top; text-align: left">0.18592</td>
<td style="vertical-align: top; text-align: left">0.40394</td>
<td style="vertical-align: top; text-align: left">0.63402</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"/>
<td colspan="6" style="vertical-align: top; text-align: center">CVD</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Logistic regression</td>
<td style="vertical-align: top; text-align: left">0.19411</td>
<td style="vertical-align: top; text-align: left">0.36650</td>
<td style="vertical-align: top; text-align: left">0.69632</td>
<td style="vertical-align: top; text-align: left">0.19514</td>
<td style="vertical-align: top; text-align: left">0.37392</td>
<td style="vertical-align: top; text-align: left">0.68277</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Decision tree</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.19686</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.35762</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.69233</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.20089</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.38991</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.64624</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>MAE – mean absolute error of disease probability (smaller is better); EER – equal error rate (smaller is better); AUC – area under the ROC curve (larger is better).</p>
</table-wrap-foot>
</table-wrap>
<fig id="j_info1198_fig_002">
<label>Fig. 2</label>
<caption>
<p>Decision tree model for the prediction of cardiovascular disease.</p>
</caption>
<graphic xlink:href="info1198_g002.jpg"/>
</fig>
<p>Figures <xref rid="j_info1198_fig_001">1</xref> and <xref rid="j_info1198_fig_002">2</xref> show decision tree models for IHD (the number of nodes is 13) and CVD (the number of nodes is 21). For IHD and CVD, the root node performs branching based on SBP. It has two branches: ⩽172 mmHg and &gt;172 mmHg. Terminal nodes 5, 7, 8, 10, 11, 12, and 13 are for IHD with the probability of the winning class: 0.155, 0.058, 0.089, 0.116, 0.219, 0.237, and 0.319, respectively. For CVD, the terminal nodes are 6, 9, 10, 11, 12, 14, 15, 17, 18, 20, and 21, and the probability of the winning class is 0.143, 0.043, 0.082, 0.092, 0.175, 0.108, 0.18, 0.531, 0.261, 0.607, and 0.304, respectively. Thus, the highest probability (0.319) to have IHD is when a person has high SBP (more than 172 mmHg and nodes 1 and 13). If a person’s data corresponds to nodes 1, 19, and 20, he or she has the a highest probability (0.607) to have CVD. The lowest probability (0.058) to have IHD is indicated by nodes 1, 2, 3, 4, 6, and 7, and the lowest probability to have CVD – by the presence of nodes 1, 2, 3, 4, 5, 7, 8, and 9 (the probability is 0.043).</p>
<p>The decision tree (Fig. <xref rid="j_info1198_fig_001">1</xref>) can be rewritten into equivalent set of IF-THEN decision rules (Han <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_007">2012</xref>): 
<disp-formula id="j_info1198_eq_004">
<alternatives><mml:math display="block">
<mml:mtable equalrows="false" equalcolumns="false" columnalign="left">
<mml:mtr>
<mml:mtd class="array">
<mml:mtext>IF</mml:mtext>
<mml:mi mathvariant="italic">SBP</mml:mi>
<mml:mo>⩽</mml:mo>
<mml:mn>172</mml:mn>
<mml:mtext>AND</mml:mtext>
<mml:mi mathvariant="italic">Disability</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mtext>AND</mml:mtext>
<mml:mi mathvariant="italic">Age</mml:mi>
<mml:mo>⩽</mml:mo>
<mml:mn>52.9</mml:mn>
<mml:mtext>AND</mml:mtext>
<mml:mi mathvariant="italic">MLBP</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mspace width="2em"/>
<mml:mtext>THEN</mml:mtext>
<mml:mi mathvariant="italic">Probability</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">IHD</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mn>0.155</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mtext>IF</mml:mtext>
<mml:mi mathvariant="italic">SBP</mml:mi>
<mml:mo>⩽</mml:mo>
<mml:mn>172</mml:mn>
<mml:mtext>AND</mml:mtext>
<mml:mi mathvariant="italic">Disability</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mtext>AND</mml:mtext>
<mml:mi mathvariant="italic">Age</mml:mi>
<mml:mo>⩽</mml:mo>
<mml:mn>52.9</mml:mn>
<mml:mtext>AND</mml:mtext>
<mml:mi mathvariant="italic">MLBP</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mphantom>
<mml:mtext>IF</mml:mtext></mml:mphantom>
<mml:mtext>AND</mml:mtext>
<mml:mi mathvariant="italic">Age</mml:mi>
<mml:mo>⩽</mml:mo>
<mml:mn>48.3</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mspace width="2em"/>
<mml:mtext>THEN</mml:mtext>
<mml:mi mathvariant="italic">Probability</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">IHD</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mn>0.058</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mo stretchy="false">⋯</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mtext>IF</mml:mtext>
<mml:mi mathvariant="italic">SBP</mml:mi>
<mml:mo mathvariant="normal">&gt;</mml:mo>
<mml:mn>172</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mspace width="2em"/>
<mml:mtext>THEN</mml:mtext>
<mml:mi mathvariant="italic">Probability</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">IHD</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">yes</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mn>0.319.</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[\begin{array}{l}\text{IF}\mathit{SBP}\leqslant 172\text{AND}\mathit{Disability}=1\text{AND}\mathit{Age}\leqslant 52.9\text{AND}\mathit{MLBP}=2\\ {} \hspace{2em}\text{THEN}\mathit{Probability}(\mathit{IHD}=\mathit{yes})=0.155\\ {} \text{IF}\mathit{SBP}\leqslant 172\text{AND}\mathit{Disability}=1\text{AND}\mathit{Age}\leqslant 52.9\text{AND}\mathit{MLBP}=1\\ {} \phantom{\text{IF}}\text{AND}\mathit{Age}\leqslant 48.3\\ {} \hspace{2em}\text{THEN}\mathit{Probability}(\mathit{IHD}=\mathit{yes})=0.058\\ {} \cdots \\ {} \text{IF}\mathit{SBP}>172\\ {} \hspace{2em}\text{THEN}\mathit{Probability}(\mathit{IHD}=\mathit{yes})=0.319.\end{array}\]]]></tex-math></alternatives>
</disp-formula>
</p>
<p>The probability of IHD is less than 0.5 for all terminal nodes of the decision tree. We present probabilities of IHD in IF-THEN rules to avoid using strict classification into <italic>IHD=yes</italic> and <italic>IHD=no</italic> classes. We assume that the decision tree visual representation is easier to understand than decision rules for humans, thus we will not present a full set of decision rules. If preferred, the decision rules can be obtained by traversing of all decision tree paths from root to terminal nodes.</p>
<p>The major aim of this article was to compare the errors of binary logistic regression and the decision tree and to determine which method was superior (Table <xref rid="j_info1198_tab_006">6</xref>). We found that 10-fold CV mean absolute error of classification probability in the binary logistic regression (MAE = 0.18213) was lower only by 2.04%, compared to the error in the decision tree (MAE = 0.18592) for IHD; 2.86% for CVD. Similar results were obtained for in-sample mean absolute errors, where logistic regression error was 1.40–1.98% lower than decision tree error. The difference between equal error rates for both methods, LR and DT, was small 2.48–5.75%. In all cases errors for LR were lower, except for in-sample CVD case, where EER for DT is 2.48% smaller than for LR.</p>
<p>Difference between in-sample and 10-fold CV errors was also small – 0.79–2.00%. This indicates that selected methods were not overfitting.</p>
</sec>
<sec id="j_info1198_s_009">
<label>4</label>
<title>Discussion</title>
<p>It is important not only to consider the risks of developing CVD, but also to choose the appropriate statistical method that allows for the closest assessment of these risks and produces the fewest errors. In this study, the most popular binary logistic regression model was applied to assess the impact of various factors on the risk of CVD, and the results were compared with those of the decision tree model. These methods were chosen because these two techniques have often been used for very similar tasks (Long <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_015">1993</xref>). The decision tree is easily interpretable by the consumer. In addition, decision tree models are robust to outliers, do not depend on distribution assumptions or parametric dependencies, and can easily handle missing data (Song and Lu, <xref ref-type="bibr" rid="j_info1198_ref_022">2015</xref>). Node condition testing and tree traversing from root to leaves can be performed without the need for mathematical calculations.</p>
<p>The decision tree also provides insight and understanding into the predictive structure of the data (Breiman <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_002">1984</xref>). The root node test condition variable is the most influential variable in the classification of observations. The other nodes contain most influential variables for subsets of the data.</p>
<p>The results produced by the logistic regression model are a little more complicated to read and understand as compared to those produced by the decision tree. Another reason is the scarcity of scientific studies that use decision trees for CVD data analysis. Soni <italic>et al.</italic> indicated that, compared to other classification methods such as a neural network or Naive Bayes, the decision tree algorithm was the most accurate in CVD prediction (Soni <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_023">2011</xref>). Many popular algorithms have selection bias towards covariates with many possible splits (Hothorn <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1198_ref_008">2006</xref>).</p>
<p>There are not many articles in domain of medicine that compare logistic regression to decision tree. All of them compare logistic regression to the most common decision tree algorithms (based on heuristic criterion): CART, ID3, C4.5, C5.0. In this article we compare classical statistical logistic regression method to decision tree method also based on a well-defined statistical theory.</p>
<p>We found that alcohol drinking was associated with a lower probability of IHD or CVD. This was unexpected because the relationship is inverse in most studies. However, this question was relevant earlier as well. Renaud and Lorgeril (<xref ref-type="bibr" rid="j_info1198_ref_018">1992</xref>) presented findings showing that the consumption of alcohol at the level of intake in France (20–30 g per day) can reduce the risk of coronary heart disease (CHD) by at least 40%. The researchers suggested that alcohol may protect from CHD by preventing atherosclerosis through the action of high-density-lipoprotein cholesterol, but serum concentrations of this factor are not higher in France than in other countries.</p>
<p>Mukamal <italic>et al.</italic> (<xref ref-type="bibr" rid="j_info1198_ref_016">2005</xref>) indicated that consuming moderate amounts of alcohol 3 to 4 days per week was associated with a lower relative risk of ischemic stroke (0.68; 95% CI = (0.44; 1.05)).</p>
<p>Fernandez-Scola (<xref ref-type="bibr" rid="j_info1198_ref_003">2015</xref>) presented the results of epidemiological case-control studies and meta-analyses showing a U-type bimodal relationship – i.e. that low-to-moderate alcohol consumption (particularly of wine or beer) was associated with a decrease in cardiovascular events and mortality, compared with abstention.</p>
<p>Gaziano (<xref ref-type="bibr" rid="j_info1198_ref_004">2016</xref>) indicated that alcohol was associated with an increase in HDL cholesterol and a lower risk of diabetes. He stated that this seems to be one important mechanism by which alcohol could lower the risk of heart disease. We have one more possible explanation: if people feel healthy, they allow themselves to drink more alcohol.</p>
</sec>
<sec id="j_info1198_s_010">
<label>5</label>
<title>Conclusion</title>
<p>Both methods, the binary logistic regression and the decision tree, applied for assessing the risk of IHD and CVD in middle-aged men revealed which factors were statistically significant variables to predict these diseases. For the risk of IHD, these factors were the following: systolic blood pressure, the number of brothers and sisters at present, using blood pressure-lowering medicine, disability group, alcohol drinking, body mass index, age, and intermittent claudication. For the risk of CVD, these factors were systolic blood pressure, serum cholesterol level, the number of brothers and sisters at present, using blood pressure-lowering medicine, disability group, alcohol drinking, smoking habits, the number of working days per week, time for walking in the winter, the body mass index, and age.</p>
<p>The binary logistic regression method showed a very slightly lower level of mean absolute errors than the decision tree did (the difference was 2.04% for IHD and 2.86% for CVD), but for consumers, the results of the decision tree are easier to understand and to interpret. Khemphila and Boonjing (<xref ref-type="bibr" rid="j_info1198_ref_012">2010</xref>) presented a similar problem for classifying heart disease patients using the logistic regression and the decision tree. Error rates for these methods (0.22 – for logistic regression and 0.21 – for decision tree) were also similar. Both methods are appropriate for the analysis of data on cardiovascular disease.</p>
</sec>
</body>
<back>
<ref-list id="j_info1198_reflist_001">
<title>References</title>
<ref id="j_info1198_ref_001">
<mixed-citation publication-type="chapter"><string-name><surname>Akaike</surname>, <given-names>H.</given-names></string-name> (<year>1973</year>). <chapter-title>Information theory and an extension of the maximum likelihood principle</chapter-title>. In: <string-name><surname>Petrov</surname>, <given-names>B.N.</given-names></string-name>, <string-name><surname>Csaki</surname>, <given-names>F.</given-names></string-name> (Eds.), <source>Proceedings of the 2nd International Symposium on Information Theory</source>, <conf-loc>Tsahkadsov, Armenia</conf-loc>, pp. <fpage>267</fpage>–<lpage>281</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_002">
<mixed-citation publication-type="book"><string-name><surname>Breiman</surname>, <given-names>L.</given-names></string-name>, <string-name><surname>Friedman</surname>, <given-names>J.H.</given-names></string-name>, <string-name><surname>Olshen</surname>, <given-names>J.H.</given-names></string-name>, <string-name><surname>Stone</surname>, <given-names>R.A.</given-names></string-name> (<year>1984</year>). <source>Classification and Regression Trees</source>. <publisher-name>Wadsworth</publisher-name>, <publisher-loc>Belmont, California</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_003">
<mixed-citation publication-type="journal"><string-name><surname>Fernandez-Scola</surname>, <given-names>J.</given-names></string-name> (<year>2015</year>). <article-title>Cardiovascular risks and benefits of moderate and heavy alcohol consumption</article-title>. <source>Nature Reviewas Cardiology</source>, <volume>12</volume>, <fpage>576</fpage>–<lpage>587</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_004">
<mixed-citation publication-type="journal"><string-name><surname>Gaziano</surname>, <given-names>J.M.</given-names></string-name> (<year>2016</year>). <article-title>Health alcohol consumption: myth or reality?</article-title> <source>Journal of Hypertension</source>, <volume>34</volume>, <elocation-id>e16</elocation-id>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_005">
<mixed-citation publication-type="journal"><string-name><surname>Geisser</surname>, <given-names>S.</given-names></string-name> (<year>1975</year>). <article-title>The predictive sample reuse method with applications</article-title>. <source>Journal of the American Statistical Association</source>, <volume>70</volume>, <fpage>320</fpage>–<lpage>328</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_006">
<mixed-citation publication-type="book"><string-name><surname>Glasunov</surname>, <given-names>I.S.</given-names></string-name>, <string-name><surname>Dowd</surname>, <given-names>J.E.</given-names></string-name>, <string-name><surname>Baubinienė</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Grabauskas</surname>, <given-names>V.</given-names></string-name>, <string-name><surname>Sturmans</surname>, <given-names>F.</given-names></string-name>, <string-name><surname>Shuurman</surname>, <given-names>J.H.</given-names></string-name> (<year>1981</year>). <source>The Kaunas Rotterdam Intervention Study</source>. <publisher-name>Elsevier, North Holland Biomedical Press</publisher-name>, <publisher-loc>Amsterdam</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_007">
<mixed-citation publication-type="book"><string-name><surname>Han</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Kamber</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Pei</surname>, <given-names>J.</given-names></string-name> (<year>2012</year>). <source>Data Mining: Concepts and Techniques</source>, <edition>3</edition>rd ed. <publisher-name>Morgan Kaufmann</publisher-name>, <publisher-loc>Massachusetts</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_008">
<mixed-citation publication-type="journal"><string-name><surname>Hothorn</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Hornik</surname>, <given-names>K.</given-names></string-name>, <string-name><surname>Zeileis</surname>, <given-names>A.</given-names></string-name> (<year>2006</year>). <article-title>Unbiased recursive partitioning: a conditional inference framework</article-title>. <source>Journal of Computational and Graphical Statistics</source>, <volume>15</volume>(<issue>3</issue>), <fpage>651</fpage>–<lpage>674</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_009">
<mixed-citation publication-type="journal"><string-name><surname>Huang</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>C.P.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>V.</given-names></string-name>, <string-name><surname>Wefler</surname>, <given-names>V.</given-names></string-name>, <string-name><surname>Raftery</surname>, <given-names>A.</given-names></string-name> (<year>1961</year>). <article-title>A stable reagent for the Lieberman-Burchard reaction. Application to rapid serum cholesterol determination</article-title>. <source>Analytical Chemistry</source>, <volume>33</volume>, <fpage>1405</fpage>–<lpage>1407</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_010">
<mixed-citation publication-type="other"><string-name><surname>Jing</surname>, <given-names>J.</given-names></string-name> (2013). <italic>The introduction and application of recursive partitioning methods in organizational science</italic>. PhD thesis, University of Illinois at Urbana-Champaign.</mixed-citation>
</ref>
<ref id="j_info1198_ref_011">
<mixed-citation publication-type="chapter"><string-name><surname>Kerdprasop</surname>, <given-names>N.</given-names></string-name>, <string-name><surname>Kittisak</surname>, <given-names>K.</given-names></string-name> (<year>2011</year>). <chapter-title>Heuristic-based decision tree induction method for noisy data</chapter-title>. In: <string-name><surname>Kim</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Adeli</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Cuzzocrea</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Arslan</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Ma</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Chung</surname>, <given-names>K.</given-names></string-name>, <string-name><surname>Mariyam</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Song</surname>, <given-names>X.</given-names></string-name> (Eds.), <source>Database Theory and Application, Bio-Science and Bio-Technology</source>. <publisher-name>Springer</publisher-name>, <publisher-loc>Berlin, Heidelberg</publisher-loc>, pp. <fpage>1</fpage>–<lpage>10</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_012">
<mixed-citation publication-type="chapter"><string-name><surname>Khemphila</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Boonjing</surname>, <given-names>V.</given-names></string-name> (<year>2010</year>). <chapter-title>Comparing performances of logistic regression, decision trees, and neural networks for classifying heart disease patients</chapter-title>. In: <source>2010 International Conference on Computer Information Systems and Industrial Management Applications (CISIM)</source>, pp. <fpage>193</fpage>–<lpage>198</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_013">
<mixed-citation publication-type="journal"><string-name><surname>Kuzmickienė</surname>, <given-names>I.</given-names></string-name>, <string-name><surname>Everatt</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Virvičiūtė</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Tamošiūnas</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Radišauskas</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Reklaitienė</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Milinavičienė</surname>, <given-names>E.</given-names></string-name> (<year>2013</year>). <article-title>Smoking and other risk factors for pancreatic cancer: a cohort study in men in Lithuania</article-title>. <source>Cancer Epidemiology</source>, <volume>37</volume>, <fpage>133</fpage>–<lpage>139</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_014">
<mixed-citation publication-type="other"><collab>Lithuanian Ministry of Health, Health Information Centre of Institute of Hygiene</collab> (2016). Health Statistics of Lithuania 2015. Available: <uri>http://sic.hi.lt/data/la2015.pdf</uri>. Accessed: 28 March 2017.</mixed-citation>
</ref>
<ref id="j_info1198_ref_015">
<mixed-citation publication-type="journal"><string-name><surname>Long</surname>, <given-names>W.J.</given-names></string-name>, <string-name><surname>Griffith</surname>, <given-names>J.L.</given-names></string-name>, <string-name><surname>Selker</surname>, <given-names>H.P.</given-names></string-name>, <string-name><surname>D’Agostino</surname>, <given-names>R.</given-names></string-name> (<year>1993</year>). <article-title>A comparison of logistic regression to decision-tree induction in a medical domain</article-title>. <source>Computer in Biomedical Research</source>, <volume>26</volume>, <fpage>74</fpage>–<lpage>97</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_016">
<mixed-citation publication-type="journal"><string-name><surname>Mukamal</surname>, <given-names>K.J.</given-names></string-name>, <string-name><surname>Ascherio</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Mittleman</surname>, <given-names>M.A.</given-names></string-name>, <string-name><surname>Conigrave</surname>, <given-names>K.M.</given-names></string-name>, <string-name><surname>Camargo</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>Kawachi</surname>, <given-names>I.</given-names></string-name>, <string-name><surname>Stampfer</surname>, <given-names>M.J.</given-names></string-name>, <string-name><surname>WC</surname>, <given-names>W.C.W.</given-names></string-name>, <string-name><surname>Rimm</surname>, <given-names>E.B.</given-names></string-name> (<year>2005</year>). <article-title>Alcohol and risk for ischemic stroke in men: the role of drinking patterns and usual beverage</article-title>. <source>Annals of Internal Medicine</source>, <volume>142</volume>, <fpage>11</fpage>–<lpage>19</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_017">
<mixed-citation publication-type="book"><string-name><surname>Prineas</surname>, <given-names>R.J.</given-names></string-name>, <string-name><surname>Crow</surname>, <given-names>R.S.</given-names></string-name>, <string-name><surname>Blackburn</surname>, <given-names>H.</given-names></string-name> (<year>1982</year>). <source>The Minnesota Code Manual of Electrocardiographic Findings</source>. <publisher-name>John Wright</publisher-name>, <publisher-loc>Boston</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_018">
<mixed-citation publication-type="journal"><string-name><surname>Renaud</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Lorgeril</surname>, <given-names>M.D.</given-names></string-name> (<year>1992</year>). <article-title>Wine, alcohol, platelets and the French paradox for coronary heart disease</article-title>. <source>The Lancet</source>, <volume>339</volume>(<issue>8808</issue>), <fpage>1523</fpage>–<lpage>1526</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_019">
<mixed-citation publication-type="other"><string-name><surname>Rėklaitienė</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Tamošiūnas</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Virvičiūtė</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Bacevičienė</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Lukšienė</surname>, <given-names>D.</given-names></string-name> (2012). Trends in prevalence, awareness, treatment, and control of hypertension, and the risk of mortality among middle-aged Lithuanian urban population in 1983–2009. <italic>BMC Cardiovascular Disorders</italic>, 12.</mixed-citation>
</ref>
<ref id="j_info1198_ref_020">
<mixed-citation publication-type="book"><string-name><surname>Rose</surname>, <given-names>G.A.</given-names></string-name>, <string-name><surname>Blackburn</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Gillum</surname>, <given-names>R.F.</given-names></string-name>, <string-name><surname>Prineas</surname>, <given-names>R.J.</given-names></string-name> (<year>1982</year>). <source>Cardiovascular Survey Methods</source>. <series>WHO Monograph Series. Cardiovascular Disease Unit</series>, Vol. <volume>56</volume>. <publisher-name>World Health Organization</publisher-name>, <publisher-loc>Geneva, Switzerland</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_021">
<mixed-citation publication-type="other"><string-name><surname>Schneider</surname>, <given-names>J.</given-names></string-name> (1997). <italic>Cross Validation</italic>. Available: <ext-link ext-link-type="uri" xlink:href="https://www.cs.cmu.edu/˜schneide/tut5/node42.html">https://www.cs.cmu.edu/˜schneide/tut5/node42.html</ext-link>. Accessed: 28 March 2017.</mixed-citation>
</ref>
<ref id="j_info1198_ref_022">
<mixed-citation publication-type="journal"><string-name><surname>Song</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Lu</surname>, <given-names>Y.</given-names></string-name> (<year>2015</year>). <article-title>Decision tree methods: application for classification and prediction</article-title>. <source>Shanghai Archives of Psychiatry</source>, <volume>27</volume>(<issue>2</issue>), <fpage>130</fpage>–<lpage>135</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_023">
<mixed-citation publication-type="journal"><string-name><surname>Soni</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Ansari</surname>, <given-names>U.</given-names></string-name>, <string-name><surname>Sharma</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Soni</surname>, <given-names>S.</given-names></string-name> (<year>2011</year>). <article-title>Predictive data mining for medical diagnosis: an overview of heart disease prediction</article-title>. <source>International Journal of Computer Applications</source>, <volume>17</volume>(<issue>8</issue>), <fpage>43</fpage>–<lpage>48</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_024">
<mixed-citation publication-type="journal"><string-name><surname>Stone</surname>, <given-names>M.</given-names></string-name> (<year>1974</year>). <article-title>Cross-validation choice and assessment of statistical predictions</article-title>. <source>Journal of the Royal Statistical Society, Series B</source>, <volume>36</volume>(<issue>2</issue>), <fpage>111</fpage>–<lpage>147</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_025">
<mixed-citation publication-type="journal"><string-name><surname>Strobl</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>Malley</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Tutz</surname>, <given-names>G.</given-names></string-name> (<year>2009</year>). <article-title>An introduction to recursive partitioning: rationale, application, and characteristics of classification and regression trees, bagging, and random forests</article-title>. <source>Psychological Methods</source>, <volume>14</volume>(<issue>4</issue>), <fpage>323</fpage>–<lpage>348</lpage>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_026">
<mixed-citation publication-type="journal"><string-name><surname>Tamošiūnas</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Lukšienė</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Bacevičienė</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Bernotienė</surname>, <given-names>G.</given-names></string-name>, <string-name><surname>Radišauskas</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Malinauskienė</surname>, <given-names>V.</given-names></string-name>, <string-name><surname>Krančiukaitė-Butylkinienė</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Virvičiūtė</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Peasey</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Bobak</surname>, <given-names>M.</given-names></string-name> (<year>2014</year>). <article-title>Health factors and risk of all-cause, cardiovascular, and coronary heart disease mortality: findings from the MONICA and HAPIEE studies in Lithuania</article-title>. <source>PLoS One</source>, <volume>9</volume>(<issue>12</issue>), <elocation-id>e114283</elocation-id>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_027">
<mixed-citation publication-type="other"><string-name><surname>WebFOCUS RStat</surname></string-name> (2011). <italic>Explanation of the Decision Tree Model</italic>. Available: <ext-link ext-link-type="uri" xlink:href="http://webfocusinfocenter.informationbuilders.com/wfappent/TLs/TL_rstat/source/topic41.htm">http://webfocusinfocenter.informationbuilders.com/wfappent/TLs/TL_rstat/source/topic41.htm</ext-link>. Accessed: 28 March 2017.</mixed-citation>
</ref>
<ref id="j_info1198_ref_028">
<mixed-citation publication-type="book"><string-name><surname>Witten</surname>, <given-names>I.H.</given-names></string-name>, <string-name><surname>Frank</surname>, <given-names>E.</given-names></string-name>, <string-name><surname>Hall</surname>, <given-names>M.A.</given-names></string-name> (<year>2011</year>). <source>Data Mining: Practical Machine Learning Tools and Techniques</source>, <edition>3</edition>rd ed. <publisher-name>Morgan Kaufmann</publisher-name>.</mixed-citation>
</ref>
<ref id="j_info1198_ref_029">
<mixed-citation publication-type="journal"><string-name><surname>Zhao</surname>, <given-names>Z.</given-names></string-name>, <string-name><surname>Xu</surname>, <given-names>G.</given-names></string-name>, <string-name><surname>Qi</surname>, <given-names>Y.</given-names></string-name> (<year>2016</year>). <article-title>Representation of binary feature pooling for detection of insulator strings in infrared images</article-title>. <source>IEEE Transactions on Dielectrics and Electrical Insulation</source>, <volume>23</volume>(<issue>5</issue>), <fpage>2858</fpage>–<lpage>2866</lpage>.</mixed-citation>
</ref>
</ref-list>
</back>
</article>