···
On May 27, 2005, at 12:39 PM, Dan Fitzpatrick wrote:
I would like to turn "This is some text" into
["This",
"This is",
"This is some",
"This is some text",
"is",
"is some",
"is some text",
"some",
"some text",
"text"]
-------------------------------------------------
def phrases( string )
     pieces = string.split( /\s/ )
     out = 
     pieces.each_index{ |start_index|
         (start_index+1).upto( pieces.length ){ |end_index|
             out << pieces[start_index...end_index].join(' ')
         }
     }
     out
end
all = phrases( "It's the end of the world as we know it." )
p all
#=> ["It's", "It's the", "It's the end", "It's the end of", "It's the end of the", "It's the end of the world", "It's the end of the world as", "It's the end of the world as we", "It's the end of the world as we know", "It's the end of the world as we know it.", "the", "the end", "the end of", "the end of the", "the end of the world", "the end of the world as", "the end of the world as we", "the end of the world as we know", "the end of the world as we know it.", "end", "end of", "end of the", "end of the world", "end of the world as", "end of the world as we", "end of the world as we know", "end of the world as we know it.", "of", "of the", "of the world", "of the world as", "of the world as we", "of the world as we know", "of the world as we know it.", "the", "the world", "the world as", "the world as we", "the world as we know", "the world as we know it.", "world", "world as", "world as we", "world as we know", "world as we know it.", "as", "as we", "as we know", "as we know it.", "we", "we know", "we know it.", "know", "know it.", "it."]
p all.include?( "end of the world" )
#=> true
p all.include?( "end the world" )
#=> false
Better/Faster
-------------------------------------------------
def phrase_matches( string )
     require 'set'
     pieces = string.split( /\s/ )
     out = Set.new
     pieces.each_index{ |start_index|
         (start_index+1).upto( pieces.length ){ |end_index|
             out.add( pieces[start_index...end_index].join(' ') )
         }
     }
     out
end
all = phrase_matches( "It's the end of the world as we know it." )
p all
p all.include?( "end of the world" )
p all.include?( "end the world" )
Complex-But-Memory-Efficient Answer
-------------------------------------------------
class TrieNode
     attr_accessor :children
     def initialize
         @children = {}
     end
     def add_path( array )
         node = self
         array.each{ |item| node = node.children[ item ] ||= TrieNode.new }
     end
     def includes_path?( array )
         node = self
         array.each{ |item| return false unless node = node.children[ item ] }
         true
     end
     def to_hier( depth=0 )
         tabs = "-"*depth
         out = ''
         @children.each{ |char,node|
             out << "#{tabs}#{char}\n"
             out << node.to_hier( depth+1 )
         }
         out
     end
end
class PhraseMatcher
     MATCH_WORDS = /[a-z']+/
     def initialize( string )
         @root = TrieNode.new
         pieces = string.downcase.scan( MATCH_WORDS )
         pieces.each_index{ |start_index|
             (start_index+1).upto( pieces.length ){ |end_index|
                 @root.add_path( pieces[start_index...end_index] )
             }
         }
     end
     def includes_phrase?( string )
         @root.includes_path?( string.scan( MATCH_WORDS) )
     end
end
sub_phrases = PhraseMatcher.new( "It's the end of the world as we know it, and I feel fine." )
p sub_phrases.includes_phrase?( "end of the world" )
#=> true
p sub_phrases.includes_phrase?( "end the world" )
#=> false
sub_phrases.instance_eval{ puts @root.to_hier }
#=> it
#=> -and
#=> --i
#=> ---feel
#=> ----fine
#=> world
#=> -as
#=> --we
#=> ---know
#=> ----it
#=> -----and
#=> ------i
#=> -------feel
#=> --------fine
#=> and
#=> -i
#=> --feel
#=> ---fine
#=> of
#=> -the
#=> --world
#=> ---as
#=> ----we
#=> -----know
#=> ------it
#=> -------and
#=> --------i
#=> ---------feel
#=> ----------fine
#=> it's
#=> -the
#=> --end
#=> ---of
#=> ----the
#=> -----world
#=> ------as
#=> -------we
#=> --------know
#=> ---------it
#=> ----------and
#=> -----------i
#=> ------------feel
#=> -------------fine
#=> we
#=> -know
#=> --it
#=> ---and
#=> ----i
#=> -----feel
#=> ------fine
#=> know
#=> -it
#=> --and
#=> ---i
#=> ----feel
#=> -----fine
#=> end
#=> -of
#=> --the
#=> ---world
#=> ----as
#=> -----we
#=> ------know
#=> -------it
#=> --------and
#=> ---------i
#=> ----------feel
#=> -----------fine
#=> the
#=> -world
#=> --as
#=> ---we
#=> ----know
#=> -----it
#=> ------and
#=> -------i
#=> --------feel
#=> ---------fine
#=> -end
#=> --of
#=> ---the
#=> ----world
#=> -----as
#=> ------we
#=> -------know
#=> --------it
#=> ---------and
#=> ----------i
#=> -----------feel
#=> ------------fine
#=> as
#=> -we
#=> --know
#=> ---it
#=> ----and
#=> -----i
#=> ------feel
#=> -------fine
#=> fine
#=> i
#=> -feel
#=> --fine
#=> feel
#=> -fine