import deepmatcher as dm import logging import torch logging.getLogger('deepmatcher.core').setLevel(logging.INFO) # Download sample data. !mkdir -p sample_data/itunes-amazon !wget -qnc -P sample_data/itunes-amazon https://raw.githubusercontent.com/sidharthms/deepmatcher/master/examples/sample_data/itunes-amazon/train.csv !wget -qnc -P sample_data/itunes-amazon https://raw.githubusercontent.com/sidharthms/deepmatcher/master/examples/sample_data/itunes-amazon/validation.csv !wget -qnc -P sample_data/itunes-amazon https://raw.githubusercontent.com/sidharthms/deepmatcher/master/examples/sample_data/itunes-amazon/test.csv train_dataset, validation_dataset, test_dataset = dm.data.process( path='sample_data/itunes-amazon', train='train.csv', validation='validation.csv', test='test.csv', ignore_columns=('left_id', 'right_id')) model = dm.MatchingModel() model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(attr_summarizer='sif', attr_comparator='diff') model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(attr_summarizer='sif') model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(attr_summarizer=dm.attr_summarizers.RNN()) model.initialize(train_dataset) # Explicitly initialize model. my_attr_summarizer_module = dm.modules.NoMeta(dm.modules.Lambda( lambda x, y: (x.sum(dim=1), y.sum(dim=1)))) model = dm.MatchingModel(attr_summarizer= lambda: my_attr_summarizer_module, attr_comparator='abs-diff') model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(attr_comparator='concat') model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(attr_comparator=dm.modules.Merge('mul')) model.initialize(train_dataset) # Explicitly initialize model. my_attr_comparator_module = dm.modules.Lambda( lambda x, y: torch.cat((x, y, x * y), dim=x.dim() - 1)) model = dm.MatchingModel(attr_comparator=lambda: my_attr_comparator_module) model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel( attr_summarizer=dm.attr_summarizers.Hybrid( word_contextualizer='self-attention', word_comparator='general-attention', word_aggregator='inv-freq-avg-pool')) model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel( attr_summarizer=dm.attr_summarizers.Hybrid(word_contextualizer='gru')) model.initialize(train_dataset) # Explicitly initialize model. # Example 2: dm.AttrSummarizer arg. model = dm.MatchingModel( attr_summarizer = dm.attr_summarizers.Hybrid( word_contextualizer=dm.word_contextualizers.SelfAttention(heads=2))) model.initialize(train_dataset) # Explicitly initialize model. def my_word_contextualizer(input_size): return dm.modules.NoMeta(torch.nn.Sequential( dm.modules.Lambda(lambda x: x.transpose(1, 2)), torch.nn.Conv1d(in_channels=input_size, out_channels=512, kernel_size=3, padding=1), dm.modules.Lambda(lambda x: x.transpose(1, 2)))) model = dm.MatchingModel(attr_summarizer=dm.attr_summarizers.Hybrid( word_contextualizer=my_word_contextualizer)) model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel( attr_summarizer=dm.attr_summarizers.Hybrid(word_comparator='dot-attention')) model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel( attr_summarizer = dm.attr_summarizers.Hybrid( word_comparator=dm.word_comparators.Attention(heads=4, input_dropout=0.2))) model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(attr_summarizer=dm.attr_summarizers.Hybrid( word_comparator=lambda: dm.modules.MultiSequential( dm.word_comparators.Attention(), dm.modules.RNN(unit_type='gru', layers=2)))) model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel( attr_summarizer=dm.attr_summarizers.Hybrid(word_aggregator='max-pool')) model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel( attr_summarizer = dm.attr_summarizers.Hybrid( word_aggregator=dm.word_aggregators.AttentionWithRNN( rnn='lstm', rnn_pool_style='max'))) model.initialize(train_dataset) # Explicitly initialize model. my_word_aggregator_module = dm.modules.NoMeta(dm.modules.Lambda( lambda x, y: torch.cat((x.mean(dim=1), x.max(dim=1)[0]), dim=-1))) # Next, create the matching model. model = dm.MatchingModel( attr_summarizer = dm.attr_summarizers.Hybrid(word_aggregator= lambda: my_word_aggregator_module)) model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(classifier='3-layer-residual-relu') model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(classifier='2-layer-highway-tanh') model.initialize(train_dataset) # Explicitly initialize model. model = dm.MatchingModel(classifier=dm.Classifier( dm.modules.Transform('3-layer-residual', non_linearity=None, hidden_size=512))) model.initialize(train_dataset) # Explicitly initialize model. my_classifier_module = torch.nn.Sequential( dm.modules.Transform('2-layer-highway', hidden_size=300), dm.modules.Transform('1-layer', non_linearity=None, output_size=3), torch.nn.LogSoftmax(dim=1)) model = dm.MatchingModel(classifier=lambda: my_classifier_module) model.initialize(train_dataset) # Explicitly initialize model.