csv_types.Num
1from src.csv_types.Obj import Obj 2import random 3import re 4from src.utils import per 5 6 7class Num(Obj): 8 def __init__(self, column_position=0, column_name=""): 9 super().__init__("Num") 10 self.n = 0 # items seen 11 self.at = column_position # column position 12 self.name = column_name # column name 13 self._has = [] # kept data 14 self.lo = float('inf') # lowest seen 15 self.hi = float('-inf') # highest seen 16 self.isSorted = True, # no updates since last sort of data 17 if re.search(re.compile(r"-$"), column_name or ""): 18 self.w = -1 19 else: 20 self.w = 1 21 22 def nums(self): 23 """ 24 :return: sorted self._has list 25 """ 26 if not self.isSorted: 27 self._has.sort() 28 self.isSorted = True 29 return self._has 30 31 def add(self, v): 32 from src.constants import the 33 if v != "?": 34 self.n += 1 35 self.lo = min(self.lo, v) 36 self.hi = max(self.hi, v) 37 if len(self._has) < the["nums"]: 38 self.isSorted = False 39 self._has.append(int(v)) 40 elif random.random() < the["nums"] / self.n: 41 pos = random.randint(0, len(self._has)-1) 42 self.isSorted = False 43 self._has[pos] = int(v) 44 45 def div(self): 46 a = self.nums() 47 return (per(a, 0.9) - per(a, 0.1)) / 2.58 48 49 def mid(self): 50 return per(self.nums(), 0.5)
class
Num(src.csv_types.Obj.Obj):
8class Num(Obj): 9 def __init__(self, column_position=0, column_name=""): 10 super().__init__("Num") 11 self.n = 0 # items seen 12 self.at = column_position # column position 13 self.name = column_name # column name 14 self._has = [] # kept data 15 self.lo = float('inf') # lowest seen 16 self.hi = float('-inf') # highest seen 17 self.isSorted = True, # no updates since last sort of data 18 if re.search(re.compile(r"-$"), column_name or ""): 19 self.w = -1 20 else: 21 self.w = 1 22 23 def nums(self): 24 """ 25 :return: sorted self._has list 26 """ 27 if not self.isSorted: 28 self._has.sort() 29 self.isSorted = True 30 return self._has 31 32 def add(self, v): 33 from src.constants import the 34 if v != "?": 35 self.n += 1 36 self.lo = min(self.lo, v) 37 self.hi = max(self.hi, v) 38 if len(self._has) < the["nums"]: 39 self.isSorted = False 40 self._has.append(int(v)) 41 elif random.random() < the["nums"] / self.n: 42 pos = random.randint(0, len(self._has)-1) 43 self.isSorted = False 44 self._has[pos] = int(v) 45 46 def div(self): 47 a = self.nums() 48 return (per(a, 0.9) - per(a, 0.1)) / 2.58 49 50 def mid(self): 51 return per(self.nums(), 0.5)
Num(column_position=0, column_name='')
9 def __init__(self, column_position=0, column_name=""): 10 super().__init__("Num") 11 self.n = 0 # items seen 12 self.at = column_position # column position 13 self.name = column_name # column name 14 self._has = [] # kept data 15 self.lo = float('inf') # lowest seen 16 self.hi = float('-inf') # highest seen 17 self.isSorted = True, # no updates since last sort of data 18 if re.search(re.compile(r"-$"), column_name or ""): 19 self.w = -1 20 else: 21 self.w = 1
def
nums(self):
23 def nums(self): 24 """ 25 :return: sorted self._has list 26 """ 27 if not self.isSorted: 28 self._has.sort() 29 self.isSorted = True 30 return self._has
Returns
sorted self._has list
def
add(self, v):
32 def add(self, v): 33 from src.constants import the 34 if v != "?": 35 self.n += 1 36 self.lo = min(self.lo, v) 37 self.hi = max(self.hi, v) 38 if len(self._has) < the["nums"]: 39 self.isSorted = False 40 self._has.append(int(v)) 41 elif random.random() < the["nums"] / self.n: 42 pos = random.randint(0, len(self._has)-1) 43 self.isSorted = False 44 self._has[pos] = int(v)