import csv
import os
import pprint
import re
import cPickle as pickle
import random

from numpy import hstack, vstack, linalg, dot, array, ones, zeros

csv.register_dialect('prec', delimiter=';', quoting=csv.QUOTE_ALL)
 
#############################################################################
#
# Database -> Features: classes for converting raw db types to feature vectors
#
#############################################################################

def get_db():
    import MySQLdb
    return MySQLdb.connect(
            host="localhost",
            user="prosper",
            passwd="mas622j",
            db="prosper")

class Field:
    """Base class for a database field to be considered as a feature, usable
    for building database queries and converting returned results into
    numerical or binary values.
    """
    def __init__(self, field, constraints = None):
        self.fields = [field]
        self.tables = [field.split(".")[0]]
        self.constraints = constraints or []
        self.headings = self.fields

    def parse(self, column_vector):
        col = zeros([len(column_vector), 1])
        for i, v in enumerate(column_vector):
            col[i] = float(v)
        return col

    def __hash__(self):
        return hash(self.fields)

    def __cmp__(self, other):
        if self.fields == other.fields and self.constraints == other.constraints:
            return 0
        return -1

    def __str__(self):
        return "<%s>" % str(self.fields)

class ChoiceField(Field):
    def __init__(self, field, choices, constraints = None):
        Field.__init__(self, field, constraints)
        self.headings = ["%s_%s" % (field, str(choice)) for choice in choices]
        self.choices = choices
        self.choicemap = {}
        for i, choice in enumerate(choices):
            self.choicemap[choice] = i

    def parse(self, column_vector):
        features = zeros([len(column_vector), len(self.choices)])
        for i, v in enumerate(column_vector):
            if v == None:
                continue
            features[i][self.choicemap[v]] = 1
        return features

class StateField(ChoiceField):
    """A field representing a two-letter state code.  Converts this into a
    sparse 50-element wide binary feature vector.
    """
    states = ["MO", "FL", "GA", "WA", "VA", "IA", "OH", "NY", "CA", "NJ", "NM",
        "CO", "NC", "MA", "WI", "IL", "NE", "AZ", "TX", "OK", "TN", "IN", "HI",
        "AR", "MI", "SC", "KS", "MN", "OR", "MT", "ND", "AL", "LA", "NH", "AA",
        "MD", "AK", "ID", "CT", "KY", "PA", "UT", "ME", "AP", "MS", "WV", "DC",
        "DE", "WY", "AE", "NV", "RI", "VT"]        

    def __init__(self, field, constraints = None):
        ChoiceField.__init__(self, field, self.states, constraints)


class MemberRoleField(ChoiceField):
    """A field representing Member Roles, interpreted as a text field with a
    comma separated list containing any of the 4 different roles below.
    Converts this to a 4-element wide binary feature vector.
    """
    roles = ["Borrower", "Group Leader", "Lender", "INSTITUTIONAL_LENDER"]
    def __init__(self, field, constraints = None):
        ChoiceField.__init__(self, field, self.roles, constraints)

    def parse(self, column_vector):
        """Differs from vanilla choice field in splitting by commas"""
        features = zeros([len(column_vector), len(self.roles)])
        for i, role_str in enumerate(column_vector):
            if role_str == None:
                continue
            roles = role_str.split(",")
            for role in roles:
                if self.choicemap.has_key(role):
                    features[i][self.choicemap[role]] = 1
        return features

class WordFreqField(Field):
    """A field representing a count of the number times a particular word
    appears in the field.
    """
    def __init__(self, field, word_list, constraints = None):
        Field.__init__(self, field, constraints)
        self.headings = ["%s_%s" % (field, a) for a in word_list]
        self.word_list = word_list
    
    def parse(self, column_vector):
        counts = zeros([len(column_vector), len(self.word_list)])
        for i, text in enumerate(column_vector):
            if text:
                for j, word in enumerate(self.word_list):
                    counts[i, j] = len(text.split(word)) - 1
        return counts

class NullOrNoField(Field):
    """Is this field null? 0.  No? 1."""
    def __init__(self, field, constraints = None):
        Field.__init__(self, field, constraints)
        self.headings = [field + "_null?"]

    def parse(self, column_vector):
        col = zeros([len(column_vector), 1])
        for i, stuff in enumerate(column_vector):
            if column_vector != None:
                col[i] = 1
        return col

class NullOrNumberField(Field):
    """Returns 2 columns; one is binary for field is null, the other is a
    numerical value (or zero if the field is null).
    """
    def __init__(self, field, constraints = None):
        Field.__init__(self, field, constraints)
        self.headings = [field + "_null?", field]
        
    def parse(self, column_vector):
        cols = zeros([len(column_vector), 2]) 
        for i, v in enumerate(column_vector):
            if v == None:
                cols[i, 0] = 1
            else:
                cols[i, 1] = float(v)
        return cols

class CreditGradeField(Field):
    grades = {"AA": 760, "A": 720, "B": 680, "C": 640, "D": 600, "E": 560, "HR": 520, "NC": 0}
    def __init__(self, field, constraints = None):
        Field.__init__(self, field, constraints)

    def parse(self, column_vector):
        cols = zeros([len(column_vector), 1])
        for i, v in enumerate(column_vector):
            cols[i] = self.grades[v]
        return cols

class WordPresentField(Field):
    def __init__(self, field, words_dict, constraints = None):
        Field.__init__(self, field, constraints)
        items = words_dict.items()
        items.sort()
        self.words = []
        for k, v in items:
            self.words += v
        self.headings = ["%s_%s" % (field, word) for word in self.words]
        self.splitter = re.compile('\W+')
        self.html_filter = re.compile("((<.*?>)|(\&[a-z]+;))")

    def parse(self, column_vector):
        cols = zeros([len(column_vector), len(self.words)]) 
        for i, v in enumerate(column_vector):
            if v != None:
                v = self.html_filter.sub(' ', v)
                filtered = " ".join(self.splitter.split(v))
                for col, word in enumerate(self.words):
                    if word in filtered:
                        cols[i, col] = 1
        return cols

class IndexedChoiceField(Field):
    def __init__(self, field, choices, constraints = None):
        Field.__init__(self, field, constraints)
        self.choices = choices
        self.indeces = dict([(v, k) for k, v in enumerate(choices)])

    def parse(self, column_vector):
        col = zeros([len(column_vector), 1])
        for i, v in enumerate(column_vector):
            col[i] = self.indeces[v]
        return col

class CountRegexSubsField(Field):
    def __init__(self, field, regex, heading = "regex", constraints = None):
        Field.__init__(self, field, constraints)
        self.regex = re.compile(regex)
        self.headings = ["%s_%s" % (field, heading)]

    def parse(self, column_vector):
        col = zeros([len(column_vector), 1])
        for i, v in enumerate(column_vector):
            if v != None:
                col[i] = self.regex.subn('', v)[1]
        return col


#
# Constraints
#

class JoinField(Field):
    """A field representing a constraint joining two fields together.
    Contributes to the defined tables and constraints, but does not add any
    selection.
    """
    def __init__(self, field1, field2):
        self.fields = []
        self.tables = [field1.split(".")[0], field2.split(".")[0]]
        self.constraints = ['%s=%s' % (field1, field2)]
        self.headings = []

class ConstraintField(Field):
    """A field representing a generic constraint, which should be valid SQL.
    Adds tables if they are given in an argument.  Adds no fields to the
    selection.
    """
    def __init__(self, constraint, tables = None):
        self.fields = []
        self.tables = tables or []
        self.constraints = [constraint]
        self.headings = []

#
# Feature sets
#

class FeatureSet:
    def __init__(self, name, fields = None, constraints = None, limit = None):
        self.name = name
        self.fields = fields or []
        self.constraints = constraints or []
        self.headings = []
        self.limit = limit
        if fields:
            for field in self.fields:
                self.headings += field.headings
            self._query_db()

    def _build_query(self):
        """Given a list of fields, constructs an SQL query.
        """
        fields = self.fields + self.constraints
        if not fields:
            return None
        base_query = """SELECT %s FROM %s WHERE %s"""
        tables = set()
        selection = []
        conditions = set()
        for field in fields:
            tables = tables.union(field.tables)
            selection += field.fields
            conditions = conditions.union(field.constraints)

        query = base_query % (",".join(selection), 
                ",".join(tables), 
                (" AND ".join(conditions)) or "TRUE")
        return query

    def _query_db(self):
        # Fetch database values
        db = get_db()
        csr = db.cursor()
        query = self._build_query() 
        if query:
            # Memory bug.  MySQL can't fetch all at once, so split the queries.
            print query
            # () evaluates as +infinity
            limit = min(self.limit or (), 30000)
            offset = 0
            rows = []
            while True:
                limited = "%s LIMIT %i OFFSET %i" % (query, limit, offset)
                print offset
                csr.execute(limited)
                fetch = csr.fetchall()
                rows += fetch
                offset += limit
                if len(fetch) == 0 or (self.limit and self.limit <= offset):
                    break
            rows = array(rows)
            csr.close()

            # Convert database values to a feature matrix
            cols = []
            for i,field in enumerate(self.fields):
                print i
                cols.append(field.parse(rows[:, i]))
            self.features = hstack(cols)
        db.close()

    def subsample(self, samples):
        """Return a subsampling of self.features with given number of random
        samples.
        """
        return array(random.sample(self.features, samples))
    @classmethod
    def unserialize(cls, filename):
        reader = csv.reader(open(filename), dialect='excel-tab')
        rows = []
        for row in reader:
            rows.append(row)
        name = rows.pop(0)[0]
        fs = FeatureSet(name)
        fs.headings = rows.pop(0)
        fs.features = []
        for row in rows:
            fs.features.append([float(f) for f in row])
        fs.features = array(fs.features)
        return fs

    def serialize(self, filename):
        self._build_dir_tree(filename)
        writer = csv.writer(open(filename, 'wb'), dialect='excel-tab')
        writer.writerow([self.name])
        writer.writerow(self.headings)
        writer.writerows(self.features)

    def write_csv(self, filename, columns = None, samples = None):
        self._build_dir_tree(filename)
        headings = array(self.headings)
        if not samples:
            features = self.features
        else:
            features = self.subsample(samples)
        if columns:
            features = features[:, columns]
            headings = headings[:, columns]
        writer = csv.writer(file(filename, 'wb'), dialect='excel-tab')
        writer.writerow(headings)
        writer.writerows(features)

    def _build_dir_tree(self, filename):
        dirs = filename.split('/')[:-1]
        path = ""
        for dir in dirs:
            path = os.path.join(path, dir)
            if not os.path.exists(path):
                os.mkdir(path)

##############################################################################
#
# Feature search algorithms: Classes for determing the "best" subset of 
# features.
#
##############################################################################

class ScoreTable(dict):
    """Table of scores for featuresets, used in feature search algorithms.
    Extends dict to include an additional "best" dict, which stores the highest
    scoring value yet entered for a featureset, indexed by the length
    of the feature set.
    """
    def __init__(self):
        dict.__init__(self)
        self.best = {}

    def store(self, features, score):
        """Store the key,value pair of features (a iterable) and score (a
        float).  Also updates the "best" scores for the number of features
        if applicable.
        """
        best_score = self.best.get(len(features), [None, None])[0]
        if score > best_score:
            self.best[len(features)] = [score, frozenset(features)]
        self[frozenset(features)] = score

    def best_for(self, k):
        return set(self.best[k][1])

    def best_score_for(self, k):
        return self.best[k][0]

class FeatureSearch:
    """Base class for feature search algorithms.  Evaluates featuresets using a
    linear discriminant test.
    """
    def __init__(self, class1, class2):
        ac1 = self.augment(class1)
        ac2 = self.augment(class2)
        self.Y = vstack([ac1, ac2 * -1])

    def augment(self, features):
        """Put a column of ones to the left of the given matrix."""
        ones_column = ones((len(features), 1))
        return hstack([ones_column, features])

    def evaluate(self, feature_set):
        """Perform linear discriminant test on this feature set with 
        all training data.  Return the percent success.
        """
        # Split off the columns representing the classification and the
        # features we are after.
        cols = [0] + list(feature_set)
        cols.sort()
        subY = self.Y[:, cols]
        # Find the weight vector for classification.
        a = dot(linalg.pinv(subY), ones([len(subY), 1]))
        # Find the error for this classifier.
        successes = 0
        for y in subY:
            if y[0] == -1:
                success = dot(y * -1, a) < 0
            else:
                success = dot(y, a) >= 0
            if success:
                successes += 1
        return float(successes) / len(subY)

    def search(self, max_features = None):
        """ Perform a naive greedy forward search for features."""
        # keep scores in a list sorted nicely for retrieval:
        if not max_features:
            max_features = self.Y.shape[1]

        scores = ScoreTable()
        # ignore column 0 (class column)
        for k in range(1, max_features):
            print k
            for f in range(1, self.Y.shape[1]):
                test_set = scores.best_for(k - 1)
                if test_set:
                    test_set.add(f)
                else:
                    test_set = set([f])
                score = self.evaluate(test_set)
                scores.store(test_set, score)
        return scores.best

class BruteFeatureSearch(FeatureSearch):
    """Search all possible subsets of features.  As this constitutes 2**n
    possible subsets, this is only useful for relatively small featuresets.
    """
    @classmethod
    def all_possible_subsets(cls, items):
        """whoa.  Returns all possible non-empty subsets."""
        return [[x for (pos, x) in zip(range(len(items)), items) if \
                (2**pos) & switches] for \
                switches in range(1, 2**len(items))]

    def search(self):
        scores = ScoreTable()
        # ignore column 0 (class column)
        apc = self.all_possible_subsets(range(1, self.Y.shape[1]))
        for features in apc:
            score = self.evaluate(features)
            scores.store(features, score)
        return scores.best

#class BackwardsFloatingSearch(FeatureSearch):
#    """Perform backwards floating search.  A semi-greedy backwards search
#    with greedy backtracking (forwardtracking?).
#    """
#    def search(self):
#        scores = ScoreTable()
#        # ignore column 0 (class column)
#        features = frozenset(range(1, self.Y.shape[1]))
#        scores.store(features, self.evaluate(features))
#        max_k = len(features)
#        k = max_k
#        while k > 0:
#            # Determine the best subset: try removing each feature.
#            best_superset = scores.best_for(k)
#            for f in best_superset:
#                test_set = set(best_superset)
#                test_set.remove(f)
#                if frozenset(test_set) not in scores:
#                    scores.store(test_set, self.evaluate(test_set))
#            # Backtracking: try to find a better superset
#            # Start with the best set found from the previous loop.
#            best_subset = scores.best_for(k - 1)
#            available_features = features.difference(best_subset)
#            for f in available_features:
#                test_set = set(best_subset)
#                test_set.add(f)
#                if frozenset(test_set) not in scores:
#                    best_k = scores.best_for(k)
#                    scores.store(test_set, self.evaluate(test_set))
#                    # continue backtracking up k if we've improved
#                    new_best = scores.best_for(k)
#                    if new_best != best_k:
#                        best_subset = scores.best_for(k)
#                        k += 1
#            k -= 1
#            print k
#        return scores.best

class ForwardsFloatingSearch(FeatureSearch):
    """Perform forwards floating search.  A semi-greedy forwards search
    with greedy backtracking."""

    def __init__(self, c1, c2, max_num_features = None, pickle_file = None):
        FeatureSearch.__init__(self, c1, c2)
        self.max_num_features = max_num_features
        if pickle_file:
            self.pickle_file = pickle_file
            if os.path.exists(pickle_file):
                try:
                    f = open(pickle_file)
                    self.scores = pickle.load(f)
                    f.close()
                except:
                    print "Exception caught while attempting to load:", pickle_file, "Ignoring."

    def search(self):
        self.scores = ScoreTable()
        # ignore column 0 (class column)
        features = frozenset(range(1, self.Y.shape[1]))
        if self.max_num_features:
            max_k = self.max_num_features
        else:
            max_k = len(features)
        # Start with a score of 0 for empty set.
        self.scores.store(set(), 0)
        k = 0
        while k < max_k:
            score, best_k = self.scores.best[k]
            print k, score
            available_features = features.difference(best_k)
            for f in available_features:
                test_set = set(best_k)
                test_set.add(f)
                if frozenset(test_set) not in self.scores:
                    self.scores.store(test_set, self.evaluate(test_set))
            # Backtracking: try to find a better subset
            k = self._backtrack(self.scores.best_for(k + 1))

            # Serialize scores for interruptability
            if k % 10 == 0:
                f = open(self.pickle_file, 'wb')
                pickle.dump(self.scores, f)
                f.close()

        return self.scores.best

    def _backtrack(self, super_set, depth = 1):
        k = len(super_set)
        min_k = k
        for f in super_set:
            test_set = set(super_set)
            test_set.remove(f)
            if frozenset(test_set) not in self.scores:
                best_subset = self.scores.best_for(k - 1)
                self.scores.store(test_set, self.evaluate(test_set))
                new_best = self.scores.best_for(k - 1)
                if best_subset != new_best:
                    print "b" * depth, k - 1, self.scores.best_score_for(k - 1)
                    min_k = min(min_k, self._backtrack(test_set, depth + 1))
        return min_k



#############################################################################
#
#  Main.  Get the features and run a search.
#
#############################################################################

class FeatureFinderFileWriter:
    def __init__(self, name1, name2, root):
        self.root = root
        self.fs1 = FeatureSet.unserialize("%s/%s.csv" % (self.root, name1))
        self.fs2 = FeatureSet.unserialize("%s/%s.csv" % (self.root, name2))

    def get_best_features(self, method = "forwards_floating", samples = None, max_num_features = 30):
        best_name = "%s/%s-%s-%isamps-best.pickle" % (self.root, self.fs1.name, self.fs2.name, samples)
        scores_name = "%s/%s-%s-%isamps-all.pickle" % (self.root, self.fs1.name, self.fs2.name, samples)
        working_name = "%s/%s-%s-%isamps-working.pickle" % (self.root, self.fs1.name, self.fs2.name, samples)
        if method == None:
            self.best = pickle.load(open(best_name))
            self.scores = pickle.load(open(scores_name))
            return
        if samples == None:
            c1 = self.fs1.features
            c2 = self.fs2.features
        else:
            c1 = self.fs1.subsample(samples)
            c2 = self.fs2.subsample(samples)

        if method == "brute":
            search = BruteFeatureSearch(c1, c2)
            print "Brute"
        elif method == "greedy":
            search = FeatureSearch(c1, c2)
            print "Greedy"
            pprint.pprint(fs.search())
        elif method == "backwards_floating":
            search = BackwardsFloatingSearch(c1, c2)
            print "Backwards Floating"
        elif method == "forwards_floating":
            search = ForwardsFloatingSearch(c1, c2, max_num_features, 
                    pickle_file = working_name)
            print "Forwards Floating"
        self.best = search.search()
        self.scores = search.scores
        pprint.pprint(self.best)
        pickle.dump(self.best, open(best_name, 'w'))
        pickle.dump(search.scores, open(scores_name, 'w'))

    def write_sampled_files(self, nums_of_features = None, nums_of_samples = None):
        if nums_of_samples == None:
            nums_of_samples = (100, 1000, 10000, None)
        if nums_of_features == None:
            nums_of_features = (3, 5, 10, 15, 20, 25, 30, 35, None)
        for fs in (self.fs1, self.fs2):
            for samples in nums_of_samples:
                if samples == None:
                    samples = len(fs.features)
                for num_features in nums_of_features:
                    if num_features == None:
                        num_features = len(fs.headings)
                    if num_features == len(fs.headings):
                        columns = range(len(fs.headings))
                        col_list = "_all"
                    else:
                        columns = [c - 1 for c in self.best[num_features][1]]
                        columns.sort()
                        col_list = "-".join([str(i) for i in columns])
                    filename = "%s/%s/samps%i_cols%s.dat" % (self.root, fs.name, samples, col_list)
                    print filename
                    fs.write_csv(filename, columns = columns, samples = samples)

    def write_heading_key(self):
        for fs in (self.fs1, self.fs2):
            filename = "%s/%s/headings.txt" % (self.root, fs.name)
            file = open(filename, 'w')
            for i, h in enumerate(fs.headings):
                file.write("%i\t%s\n" % (i + 1, h))
            file.close()

    def write_best(self):
        name = "%s/%s-%s-best_features.txt" % (self.root, self.fs1.name, self.fs2.name)
        items = self.best.items()
        items.sort()
        out = ""
        for num, (utility, features) in self.best.iteritems():
            out += "%i: %f, %s\n" % (num, utility, str(sorted(list(features))))

        f = open(name, 'w')
        f.write(out)
        f.close()


    @classmethod
    def build_feature_sets_from_db(cls, root, limit = None):
        fields, constraints = all_features_and_constraints()
        loans_constraint = JoinField("Listings.ProsperKey", "Loans.ListingKey")
        listings_constraint = ConstraintField(
                "Listings.ProsperKey NOT IN (SELECT ListingKey FROM Loans)", 
                tables=["Listings"])
        paid_constraint = ConstraintField("Loans.Status = 'Paid'")
        defaulted_constraint = ConstraintField("(Loans.Status LIKE '%Defaulted%' OR Loans.Status = 'Charge-off' OR Loans.Status = 'Repurchased')")

        def build_file(name, class_constraints):
            print "#"
            print "# %s" % name
            print "#"
            fs = FeatureSet(name, fields, constraints + class_constraints, limit = limit)
            fs.serialize("%s/%s.csv" % (root, name))

        build_file("loans", [loans_constraint])
        build_file("listings", [listings_constraint])
        build_file("paid", [loans_constraint, paid_constraint])
        build_file("defaulted", [loans_constraint, defaulted_constraint])



def all_features_and_constraints():
    # the database values (selection) to convert to features.
    fields = [
            # Credit Profiles
            NullOrNumberField("CreditProfiles.AmountDelinquent"),
            NullOrNumberField("CreditProfiles.BankcardUtilization"),
            NullOrNumberField("CreditProfiles.CurrentCreditLines"),
            NullOrNumberField("CreditProfiles.CurrentDelinquencies"),
            NullOrNumberField("CreditProfiles.DelinquenciesLast7Years"),
            Field("CreditProfiles.Income"),
            Field("CreditProfiles.LengthStatusMonths"),
            NullOrNumberField("CreditProfiles.OpenCreditLines"),
            NullOrNumberField("CreditProfiles.RevolvingCreditBalance"),

            # Listings
            Field("Listings.AmountRequested"),
            Field("Listings.BorrowerMaximumRate"),
            Field("Listings.Category"),
            CreditGradeField("Listings.CreditGrade"),
            NullOrNumberField("Listings.DebtToIncomeRatio"),
            WordPresentField("Listings.Description", {
                3: ["cards and other", "monthly expenses housing", "clothing household expenses",
                    "and other loans", "phone cable internet", "credit cards and",
                    "monthly net income", "a good candidate", "good candidate for",
                    "my financial situation"], 
                2: ["other expenses", "expenses housing", "clothing household", "car expenses",
                    "household expenses", "other loans", "phone cable", "and other",
                    "food entertainment", "cards and"],
                1: ["prosper", "as", "clothing", "household", "housing", "card", "entertainment",
                    "is", "with", "an"],
            }),
            IndexedChoiceField("Listings.FundingOption", ["Close When Funded", "Open For Duration"]),
            NullOrNoField("Listings.Images"),
            NullOrNumberField("Listings.IsBorrowerHomeowner"),
            WordPresentField("Listings.Title", {
                3: ["high interest credit", "off high interest", "credit card debt", 
                    "interest credit card", "off credit cards"],
                2: ["credit card", "high interest", "in prosper", "interest credit", "pay off"],
                1: ["bills", "prosper", "card", "interest", "credit"],
            }),
            CountRegexSubsField("Listings.Title", "[^A-Za-z ]", "nonalpha"),
            CountRegexSubsField("Listings.Title", "[A-Z]", "caps"),
            
            # Members
            NullOrNoField("Members.Endorsements"),
            WordPresentField("Members.Endorsements", {
                3: ["i have known", "he is a", "this is a", "i will be", "i ve known"],
                2: ["i have", "is a", "this loan", "will be", "he is"],
                1: ["i", "and", "a", "to", "the"],
            }),
            CountRegexSubsField("Members.FriendMemberKeys", "([^,]+)", "numfriends"),
            NullOrNoField("Members.GroupKey"),
            NullOrNoField("Members.Images"),
            MemberRoleField("Members.Roles"),
    ]
    # non-selecting fields that constrain the selection
    constraints = [JoinField("Members.ProsperKey", "Listings.MemberKey"),
            JoinField("CreditProfiles.ListingKey", "Listings.ProsperKey")]
    return fields, constraints


if __name__ == "__main__":
    FeatureFinderFileWriter.build_feature_sets_from_db("features30")
    root = "features"
    f2 = FeatureFinderFileWriter("paid", "defaulted", root)
    f2.get_best_features(method = "forwards_floating", samples = 1000, max_num_features = None)
    f2.write_sampled_files(nums_of_samples = [100, 1000, None],
            nums_of_features = [3, 5, 10, 15, 20, 25, 30, None])
    f2.write_heading_key()
    f2.write_best()

    root = "features_b"
    f1 = FeatureFinderFileWriter("loans", "listings", root)
    f1.get_best_features(method = "forwards_floating", samples = 10000, max_num_features = None)
    f1.write_sampled_files(nums_of_samples = [1000, 10000, None], 
            nums_of_features = [3, 5, 10, 15, 20, 25, 30, None])
    f1.write_heading_key()
    f1.write_best()
