···
On May 27, 2005, at 12:39 PM, Dan Fitzpatrick wrote:
I would like to turn "This is some text" into
["This",
"This is",
"This is some",
"This is some text",
"is",
"is some",
"is some text",
"some",
"some text",
"text"]
-------------------------------------------------
def phrases( string )
pieces = string.split( /\s/ )
out =
pieces.each_index{ |start_index|
(start_index+1).upto( pieces.length ){ |end_index|
out << pieces[start_index...end_index].join(' ')
}
}
out
end
all = phrases( "It's the end of the world as we know it." )
p all
#=> ["It's", "It's the", "It's the end", "It's the end of", "It's the end of the", "It's the end of the world", "It's the end of the world as", "It's the end of the world as we", "It's the end of the world as we know", "It's the end of the world as we know it.", "the", "the end", "the end of", "the end of the", "the end of the world", "the end of the world as", "the end of the world as we", "the end of the world as we know", "the end of the world as we know it.", "end", "end of", "end of the", "end of the world", "end of the world as", "end of the world as we", "end of the world as we know", "end of the world as we know it.", "of", "of the", "of the world", "of the world as", "of the world as we", "of the world as we know", "of the world as we know it.", "the", "the world", "the world as", "the world as we", "the world as we know", "the world as we know it.", "world", "world as", "world as we", "world as we know", "world as we know it.", "as", "as we", "as we know", "as we know it.", "we", "we know", "we know it.", "know", "know it.", "it."]
p all.include?( "end of the world" )
#=> true
p all.include?( "end the world" )
#=> false
Better/Faster
-------------------------------------------------
def phrase_matches( string )
require 'set'
pieces = string.split( /\s/ )
out = Set.new
pieces.each_index{ |start_index|
(start_index+1).upto( pieces.length ){ |end_index|
out.add( pieces[start_index...end_index].join(' ') )
}
}
out
end
all = phrase_matches( "It's the end of the world as we know it." )
p all
p all.include?( "end of the world" )
p all.include?( "end the world" )
Complex-But-Memory-Efficient Answer
-------------------------------------------------
class TrieNode
attr_accessor :children
def initialize
@children = {}
end
def add_path( array )
node = self
array.each{ |item| node = node.children[ item ] ||= TrieNode.new }
end
def includes_path?( array )
node = self
array.each{ |item| return false unless node = node.children[ item ] }
true
end
def to_hier( depth=0 )
tabs = "-"*depth
out = ''
@children.each{ |char,node|
out << "#{tabs}#{char}\n"
out << node.to_hier( depth+1 )
}
out
end
end
class PhraseMatcher
MATCH_WORDS = /[a-z']+/
def initialize( string )
@root = TrieNode.new
pieces = string.downcase.scan( MATCH_WORDS )
pieces.each_index{ |start_index|
(start_index+1).upto( pieces.length ){ |end_index|
@root.add_path( pieces[start_index...end_index] )
}
}
end
def includes_phrase?( string )
@root.includes_path?( string.scan( MATCH_WORDS) )
end
end
sub_phrases = PhraseMatcher.new( "It's the end of the world as we know it, and I feel fine." )
p sub_phrases.includes_phrase?( "end of the world" )
#=> true
p sub_phrases.includes_phrase?( "end the world" )
#=> false
sub_phrases.instance_eval{ puts @root.to_hier }
#=> it
#=> -and
#=> --i
#=> ---feel
#=> ----fine
#=> world
#=> -as
#=> --we
#=> ---know
#=> ----it
#=> -----and
#=> ------i
#=> -------feel
#=> --------fine
#=> and
#=> -i
#=> --feel
#=> ---fine
#=> of
#=> -the
#=> --world
#=> ---as
#=> ----we
#=> -----know
#=> ------it
#=> -------and
#=> --------i
#=> ---------feel
#=> ----------fine
#=> it's
#=> -the
#=> --end
#=> ---of
#=> ----the
#=> -----world
#=> ------as
#=> -------we
#=> --------know
#=> ---------it
#=> ----------and
#=> -----------i
#=> ------------feel
#=> -------------fine
#=> we
#=> -know
#=> --it
#=> ---and
#=> ----i
#=> -----feel
#=> ------fine
#=> know
#=> -it
#=> --and
#=> ---i
#=> ----feel
#=> -----fine
#=> end
#=> -of
#=> --the
#=> ---world
#=> ----as
#=> -----we
#=> ------know
#=> -------it
#=> --------and
#=> ---------i
#=> ----------feel
#=> -----------fine
#=> the
#=> -world
#=> --as
#=> ---we
#=> ----know
#=> -----it
#=> ------and
#=> -------i
#=> --------feel
#=> ---------fine
#=> -end
#=> --of
#=> ---the
#=> ----world
#=> -----as
#=> ------we
#=> -------know
#=> --------it
#=> ---------and
#=> ----------i
#=> -----------feel
#=> ------------fine
#=> as
#=> -we
#=> --know
#=> ---it
#=> ----and
#=> -----i
#=> ------feel
#=> -------fine
#=> fine
#=> i
#=> -feel
#=> --fine
#=> feel
#=> -fine