Merge branch 'IT17181570' into 'master'

Extract dataset for do the probability distribution questions with dataset See merge request !62

Merge branch 'IT17181570' into 'master'
Extract dataset for do the probability distribution questions with dataset See merge request !62
37570988 · Gamage B.G.N.U · 44c3082d · 896cc262 · 37570988 · 37570988
Commit 37570988 authored Jan 27, 2021 by Gamage B.G.N.U
5 changed files
--- a/app/src/main/java/com/elearning/probabilityandstatisticsanalyserandeducator/dataExtraction/ExtractValueForRandomVariable.java
+++ b/app/src/main/java/com/elearning/probabilityandstatisticsanalyserandeducator/dataExtraction/ExtractValueForRandomVariable.java
+package com.elearning.probabilityandstatisticsanalyserandeducator.dataExtraction;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+class ExtractValueForRandomVariable {
+    List<String> randomVariable = new ArrayList<String>();
+    List<String> wordlist = new ArrayList<String>();
+    String word;
+    String data;
+
+
+    public List<String> InputRandomVariable(String[] question) {
+
+        int counter = 0;
+        wordlist = Arrays.asList(question);
+        for (String s : question) {
+            word = s.toString().trim();
+            String data1 = word;
+
+
+            Pattern pattern1 = Pattern.compile("\\b(\\d)+(,(\\d)+)*\\b|\\b(\\d)+(\\s(\\d)+)*\\b|\\b(\\d)+(;(\\d)+)*\\b|\\b(\\d)+(:(\\d)+)*\\b");
+            s.replaceAll("\\s","");
+
+            List<Pattern> patterns = new ArrayList<>();
+
+            patterns.add(pattern1);
+
+            for (Pattern pattern : patterns) {
+                Matcher matcher = pattern.matcher(data1);
+
+                while (matcher.find()) {
+                    randomVariable.add(counter, matcher.group().trim());
+                    counter++;
+
+                }
+            }
+
+            Iterator it = randomVariable.iterator();
+            System.out.println("random variable in extract "+ randomVariable.toString());
+            System.out.println("randam variable in extract size"+ randomVariable.size());
+            System.out.println("random variable in extract "+ counter);
+
+        }
+        System.out.println("values: "+data);
+        return randomVariable;
+
+
+    }
+}
--- a/app/src/main/java/com/elearning/probabilityandstatisticsanalyserandeducator/dataExtraction/ProbabilityValue.java
+++ b/app/src/main/java/com/elearning/probabilityandstatisticsanalyserandeducator/dataExtraction/ProbabilityValue.java
+package com.elearning.probabilityandstatisticsanalyserandeducator.dataExtraction;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+class ProbabilityValue {
+    List<String> probabilityvalue = new ArrayList<String>();
+    List<String> wordlist = new ArrayList<String>();
+    String word;
+    String data;
+
+
+    public List<String> ExtractionProbabilityValue(String[] question) {
+
+        int counter = 0;
+        wordlist = Arrays.asList(question);
+        for (String s : question) {
+            word = s.toString().trim();
+            String data1 = word;
+
+
+            Pattern pattern1 = Pattern.compile("\\b(0(\\.\\d+)?|1\\.0)+(,(0(\\.\\d+)?|1\\.0)+)*\\b|\\b(0(\\.\\d+)?|1\\.0)+(\\s(0(\\.\\d+)?|1\\.0)+)*\\b|\\b(0(\\.\\d+)?|1\\.0)+(;(0(\\.\\d+)?|1\\.0)+)*\\b|\\b(0(\\.\\d+)?|1\\.0)+(:(0(\\.\\d+)?|1\\.0)+)*\\b");
+            s.replaceAll("\\s","");
+
+            List<Pattern> patterns = new ArrayList<>();
+
+            patterns.add(pattern1);
+
+            for (Pattern pattern : patterns) {
+                Matcher matcher = pattern.matcher(data1);
+
+                while (matcher.find()) {
+                    probabilityvalue.add(counter, matcher.group().trim());
+                    counter++;
+
+                }
+            }
+
+            Iterator it = probabilityvalue.iterator();
+            System.out.println("probability value in extract "+ probabilityvalue.toString());
+            System.out.println("probability value in extract size"+ probabilityvalue.size());
+            System.out.println("probability value in extract "+ counter);
+
+        }
+        System.out.println("values: "+data);
+        return probabilityvalue;
+
+
+    }
+}
--- a/app/src/main/python/probabilitydistribution.py
+++ b/app/src/main/python/probabilitydistribution.py
+from fuzzywuzzy import process
+def main(data,data2):
+	str2match=data
+	stroptions=["A probability distribution of the number of overtime hours worked in one week per employee is listed below.
+				Overtime hours,0,1,2,3,4,5,6
+				Probability,0.016,0.075,0.134,0.31,0.23,0.155,0.08
+				Find the standard deviation of the probability distribution. (Round to 1 decimal place as needed.)"]
+	ratio=process.extract(str2match,data2)
+	highest=process.extractOne(str2match,data2)
+	#print(ratio)
+	check_list=isinstance(data2,list)
+	#print(check_list)
+	#print(type(data2))
+	return ""+str(highest)
\ No newline at end of file
--- a/app/src/main/python/probabilitydistribution_keywords.py
+++ b/app/src/main/python/probabilitydistribution_keywords.py
+def word2vec(word):
+    from collections import Counter
+    from math import sqrt
+
+    # count the characters in word
+    cw = Counter(word)
+    # precomputes a set of the different characters
+    sw = set(cw)
+    # precomputes the "length" of the word vector
+    lw = sqrt(sum(c*c for c in cw.values()))
+
+    # return a tuple
+    return cw, sw, lw
+
+def cosdis(v1, v2):
+    # which characters are common to the two words?
+    common = v1[1].intersection(v2[1])
+    # by definition of cosine distance we have
+    return sum(v1[0][ch]*v2[0][ch] for ch in common)/v1[2]/v2[2]
+
+def matchkeyword(klist,sen):
+    matchedkeywordlist=[]
+    list_of_keywords = klist
+    Sentence = sen
+    threshold = 0.90
+    for key in list_of_keywords:
+        for word in Sentence.split():
+            try:
+            # print(key)
+               # print(word)
+                res = cosdis(word2vec(word), word2vec(key))
+                #print(res)
+                if res > threshold:
+                    #print("Found a word with cosine distance > 100 : {} with original word: {}".format(word, key))
+                    matchedkeywordlist.append(word)
+            except IndexError:
+                pass
+    return matchedkeywordlist
--- a/app/src/main/res/raw/propabilitydistribution.csv
+++ b/app/src/main/res/raw/propabilitydistribution.csv
+"The random variable X has a binomial distribution with parameters n=100 and p=0.8. Find the mean and the variance of X."
+"A manufacturing process produces components which are free from any faults with probability p. Find the probability that in a sample of size 50 from a large batch there are fewer than 4 faulty components when p = 0.95. Find the probability that in a sample of size 50 there are fewer than 10 faulty when p = 0.75."
+"Use the table to give a suitable approximation to the probability that 𝑋 ≥ 5 where X is binomial random variable with parameters p = 0.05 and n = 400."
+"Which of the following is not a requirement of a binomial distribution?
+a. Constant probability of success
+b. Only two possible Bernoulli outcomes
+c. Fixed number of trials
+d. Equally likely outcomes"
+"Determine whether the distribution is a discrete probability distribution.
+X 0 1 2 3 4
+P(x) 0.2 0.2 0.2 0.2 0.2"
+"Is the distribution a discrete probability distribution?
+A. No, because each probability is not between 0 and 1, inclusive.
+B. Yes, because the sum of the probabilities is equal to 1 and each probability is between 0 and 1 comma inclusive.
+C. Yes, because the sum of the probabilities is equal to 1.
+D. No, because the sum of the probabilities is not equal to 1"
+"An insurance company sells insurance policies. Each insurance policy leads to claims with an expected value of $100 and a standard deviation of $3,000.The number of policies sold is normally distributed with a mean of 1,000 and a standard deviation of 150.
+(a) What is the expectation of the total of all the claims from policies sold by this company?
+(b) What is the variance of the total of all the claims from this company's policies?"
+"A probability distribution of the number of overtime hours worked in one week per employee is listed below.
+Overtime hours,0,1,2,3,4,5,6
+Probability,0.016,0.075,0.134,0.31,0.23,0.155,0.08
+Find the standard deviation of the probability distribution. (Round to 1 decimal place as needed.)"
+"A probability distribution of the number of overtime hours worked in one week per employee is listed below.
+Overtime hours,0,1,2,3,4,5,6
+Probability,0.016,0.075,0.134,0.31,0.23,0.155,0.08"
+Find the variance of the probability distribution. (Round to 1 decimal place as needed.)"
+"A probability distribution gives us:
+A. A distribution of probabilities
+B. The probability of every possible event in a population
+C. The probability of some of the events in the population
+D. The probability of obtaining certain probabilities"